diff --git a/VERSION b/VERSION
index 530cdd9..276cbf9 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.2.4
+2.3.0
diff --git a/bin/ngt/ngt.cpp b/bin/ngt/ngt.cpp
index 1403c8b..8572721 100644
--- a/bin/ngt/ngt.cpp
+++ b/bin/ngt/ngt.cpp
@@ -122,6 +122,8 @@ main(int argc, char **argv)
     }
   } catch(NGT::Exception &err) {
     cerr << "ngt: Error: " << err.what() << endl;
+    return 1;
   }
+  return 0;
 
 }
diff --git a/bin/qbg/qbg.cpp b/bin/qbg/qbg.cpp
index 92281eb..a6c8fc5 100644
--- a/bin/qbg/qbg.cpp
+++ b/bin/qbg/qbg.cpp
@@ -23,7 +23,16 @@ main(int argc, char **argv)
 
   QBG::CLI ngt;
 
-  ngt.execute(args);
+  try {
+    ngt.execute(args);
+  } catch(NGT::Exception &err) {
+    cerr << "qbg: Error: " << err.what() << endl;
+    return 1;
+  } catch(...) {
+    cerr << "qbg: Error: " << endl;
+    return 1;
+  }
+  return 0;
 }
 
 
diff --git a/lib/NGT/Capi.cpp b/lib/NGT/Capi.cpp
index 4e1f9ae..c7a6960 100644
--- a/lib/NGT/Capi.cpp
+++ b/lib/NGT/Capi.cpp
@@ -331,6 +331,18 @@ bool ngt_set_property_object_type_integer(NGTProperty prop, NGTError error) {
   return true;
 }
 
+bool ngt_set_property_object_type_qsint8(NGTProperty prop, NGTError error) {
+  if(prop == NULL){
+    std::stringstream ss;
+    ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop;
+    operate_error_string_(ss, error);
+    return false;
+  }
+
+  (*static_cast<NGT::Property*>(prop)).objectType = NGT::ObjectSpace::ObjectType::Qsuint8;
+  return true;
+}
+
 bool ngt_set_property_distance_type(NGTProperty prop, NGT::Index::Property::DistanceType type, NGTError error) {
   if(prop == NULL){
     std::stringstream ss;
@@ -428,11 +440,7 @@ NGTPropertyInfo ngt_get_property_info(NGTIndex index, NGTError error) {
     prop.prefetchSize,
     prop.accuracyTable.c_str(),
     prop.searchType.c_str(),
-#ifdef NGT_INNER_PRODUCT
     prop.maxMagnitude,
-#else
-    -1,
-#endif
     prop.nOfNeighborsForInsertionOrder,
     prop.epsilonForInsertionOrder,
 #ifdef NGT_REFINEMENT
@@ -1055,6 +1063,47 @@ ObjectID ngt_append_index_as_float16(NGTIndex index, NGTFloat16 *obj, uint32_t o
   }
 }
 
+ObjectID ngt_insert_to_refinement_as_float(NGTIndex index, float *obj, uint32_t obj_dim, NGTError error) {
+  if(index == NULL || obj == NULL || obj_dim == 0){
+    std::stringstream ss;
+    ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " obj = " << obj << " obj_dim = " << obj_dim;
+    operate_error_string_(ss, error);
+    return 0;
+  }
+
+  try{
+    NGT::Index* pindex = static_cast<NGT::Index*>(index);
+    std::vector<float> vobj(&obj[0], &obj[obj_dim]);
+    return pindex->insertToRefinement(vobj);
+  }catch(std::exception &err) {
+    std::stringstream ss;
+    ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what();
+    operate_error_string_(ss, error);
+    return 0;
+  }
+}
+
+ObjectID ngt_append_to_refinement_as_float(NGTIndex index, float *obj, uint32_t obj_dim, NGTError error) {
+  if(index == NULL || obj == NULL || obj_dim == 0){
+    std::stringstream ss;
+    ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " obj = " << obj << " obj_dim = " << obj_dim;
+    operate_error_string_(ss, error);
+    return 0;
+  }
+
+  try{
+    NGT::Index* pindex = static_cast<NGT::Index*>(index);
+    std::vector<float> vobj(&obj[0], &obj[obj_dim]);
+    return pindex->appendToRefinement(vobj);
+  }catch(std::exception &err) {
+    std::stringstream ss;
+    ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what();
+    operate_error_string_(ss, error);
+    return 0;
+  }
+}
+
+
 bool ngt_batch_append_index(NGTIndex index, float *obj, uint32_t data_count, NGTError error) {
   try{
     NGT::Index* pindex = static_cast<NGT::Index*>(index);
diff --git a/lib/NGT/Capi.h b/lib/NGT/Capi.h
index b4aabae..4f08df3 100644
--- a/lib/NGT/Capi.h
+++ b/lib/NGT/Capi.h
@@ -196,6 +196,8 @@ bool ngt_set_property_object_type_float16(NGTProperty, NGTError);
 
 bool ngt_set_property_object_type_integer(NGTProperty, NGTError);
 
+bool ngt_set_property_object_type_qsint8(NGTProperty, NGTError);
+
 bool ngt_set_property_distance_type_l1(NGTProperty, NGTError);
 
 bool ngt_set_property_distance_type_l2(NGTProperty, NGTError);
@@ -272,6 +274,10 @@ ObjectID ngt_insert_index_as_float16(NGTIndex, NGTFloat16*, uint32_t, NGTError);
 
 ObjectID ngt_append_index_as_float16(NGTIndex, NGTFloat16*, uint32_t, NGTError);
 
+ObjectID ngt_append_to_refinement_as_float(NGTIndex, float*, uint32_t, NGTError);
+
+ObjectID ngt_insert_to_refinement_as_float(NGTIndex, float*, uint32_t, NGTError);
+
 bool ngt_batch_append_index(NGTIndex, float*, uint32_t, NGTError);
 
 bool ngt_batch_insert_index(NGTIndex, float*, uint32_t, uint32_t *, NGTError);
diff --git a/lib/NGT/Clustering.h b/lib/NGT/Clustering.h
index 046d526..bfa650b 100644
--- a/lib/NGT/Clustering.h
+++ b/lib/NGT/Clustering.h
@@ -125,7 +125,7 @@ namespace NGT {
     }
 
     static void
-      loadVectors(const std::string &file, std::vector<std::vector<float> > &vectors)
+      loadVectors(const std::string &file, std::vector<std::vector<float>> &vectors)
     {
       std::ifstream is(file);
       if (!is) {
@@ -152,7 +152,7 @@ namespace NGT {
     }
 
     static void
-      saveVectors(const std::string &file, std::vector<std::vector<float> > &vectors)
+      saveVectors(const std::string &file, std::vector<std::vector<float>> &vectors)
     {
       std::ofstream os(file);
       for (auto vit = vectors.begin(); vit != vectors.end(); ++vit) {
@@ -167,6 +167,22 @@ namespace NGT {
       }
     }
 
+    static void
+      saveVectors(const std::string &file, std::vector<std::vector<uint32_t>> &vectors)
+    {
+      std::ofstream os(file);
+      for (auto vit = vectors.begin(); vit != vectors.end(); ++vit) {
+	std::vector<uint32_t> &v = *vit;
+	for (auto it = v.begin(); it != v.end(); ++it) {
+	  os << (*it);
+	  if (it + 1 != v.end()) {
+	    os << "\t";
+	  }
+	}
+	os << std::endl;
+      }
+    }
+
     static void
       loadVector(const std::string &file, std::vector<size_t> &vectors)
     {
@@ -403,7 +419,7 @@ namespace NGT {
 	}
       }
 
-      std::vector<Entry> sortedObjects(vectors.size());
+      std::vector<Entry> sortedObjects(vectors.size());	
 #pragma omp parallel for
       for (size_t vi = 0; vi < vectors.size(); vi++) {
 	auto vit = vectors.begin() + vi;
diff --git a/lib/NGT/Command.cpp b/lib/NGT/Command.cpp
index b057edc..1d1b75f 100644
--- a/lib/NGT/Command.cpp
+++ b/lib/NGT/Command.cpp
@@ -24,8 +24,16 @@
 
 using namespace std;
 
+#define NGT_APPENDING_BINARY
+#ifdef NGT_APPENDING_BINARY
+#include	"NGT/ArrayFile.h"
+#include	"NGT/ObjectSpace.h"
+#include	"NGTQ/ObjectFile.h"
+#endif
+
 
   NGT::Command::CreateParameters::CreateParameters(Args &args) {
+    args.parse("v");
     try {
       index = args.get("#1");
     } catch (...) {
@@ -104,7 +112,7 @@ using namespace std;
     case '-': property.seedType = NGT::Property::SeedType::SeedTypeNone; break;
     }
 
-    char objectType = args.getChar("o", 'f');
+    auto objectType = args.getString("o", "f");
     char distanceType = args.getChar("D", '2');
 #ifdef NGT_REFINEMENT
     char refinementObjectType = args.getChar("R", 'f');
@@ -113,24 +121,21 @@ using namespace std;
     numOfObjects = args.getl("n", 0);
     indexType = args.getChar("i", 't');
 
-    switch (objectType) {
-    case 'f':
+    if (objectType == "f") {
       property.objectType = NGT::Index::Property::ObjectType::Float;
-      break;
-    case 'c':
+    } else if (objectType == "c") {
       property.objectType = NGT::Index::Property::ObjectType::Uint8;
-      break;
 #ifdef NGT_HALF_FLOAT
-    case 'h':
+    } else if (objectType == "h") {
       property.objectType = NGT::Index::Property::ObjectType::Float16;
-      break;
 #endif
+    } else if (objectType == "s8" || objectType == "sqsu8") {
+      property.objectType = NGT::Index::Property::ObjectType::Qsuint8;
 #ifdef NGT_BFLOAT
-    case 'H':
+    } else if (objectType == "H") {
       property.objectType = NGT::Index::Property::ObjectType::Bfloat16;
-      break;
 #endif
-    default:
+    } else {
       std::stringstream msg;
       msg << "Command::CreateParameter: Error: Invalid object type. " << objectType;
       NGTThrowException(msg);
@@ -193,11 +198,9 @@ using namespace std;
     case 'E':
       property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeNormalizedL2;
       break;
-#ifdef NGT_INNER_PRODUCT
     case 'i':
       property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeInnerProduct;
       break;
-#endif
     case 'p':  // added by Nyapicom
       property.distanceType = NGT::Index::Property::DistanceType::DistanceTypePoincare;
       break;
@@ -219,6 +222,8 @@ using namespace std;
     }
 #endif
 
+    property.clippingRate = args.getf("c", 0.0);
+
     {
       string str = args.getString("l", "-");
       if (str != "-") {
@@ -257,6 +262,7 @@ using namespace std;
       "[-N maximum-#-of-inserted-objects] "
 #endif
       "[-l #-of-neighbors-for-insertion-order[:epsilon-for-insertion-order]] "
+      "[-c scalar-quantization-clipping-rate] "
       "index(output) [data.tsv(input)]";
 
     try {
@@ -283,108 +289,49 @@ using namespace std;
 	break;
       }
     } catch(NGT::Exception &err) {
-      std::cerr << err.what() << std::endl;
-      cerr << usage << endl;
+      std::stringstream msg;
+      msg << err.what() << std::endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
   }
 
 
-  void appendTextVectors(NGT::Index &index, const std::string &data, size_t dataSize, char destination) {
-    NGT::Property prop;
-    index.getProperty(prop);
-
-    size_t id = index.getObjectRepositorySize();
-    vector<pair<NGT::Object*, size_t>> objects;
-    NGT::Timer timer;
-    timer.start();
-    ifstream is(data);
-    if (!is) {
-      cerr << "Cannot open the specified data file. " << data << endl;
-      return;
-    }
-    std::string line;
-    size_t counter = 0;
-    float maxMag = 0.0;
-    while (getline(is, line)) {
-      if (is.eof()) break;
-      if (dataSize > 0 && counter > dataSize) break;
-      vector<float> object;
-      vector<string> tokens;
-      NGT::Common::tokenize(line, tokens, "\t, ");
-      for (auto &v : tokens) object.push_back(NGT::Common::strtod(v));
-#ifdef NGT_INNER_PRODUCT
-      if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
-	double mag = 0.0;
-	for (auto &v : object) {
-	  mag += v * v;
-	}
-	if (mag > maxMag) {
-	  maxMag = mag;
-	}
-	//object.emplace_back(sqrt(maxMag - mag));
-	object.emplace_back(mag);
-      }
-#endif
-#ifdef NGT_REFINEMENT
-      if (destination == 'r') {
-	index.appendToRefinement(object);
-      } else {
-	index.append(object);
-      }
-#else
-      index.append(object);
-#endif
-      counter++;
-      id++;
-      if (counter % 1000000 == 0) {
-	timer.stop();
-	std::cerr << "appended " << static_cast<float>(counter) / 1000000.0 << "M objects.";
-	if (counter != id) {
-	  std::cerr << " # of the total objects=" << static_cast<float>(id) / 1000000.0 << "M";
-	}
-	cerr << " peak vm size=" << NGT::Common::getProcessVmPeakStr()
-	     << " time=" << timer << std::endl;
-	timer.restart();
-      }
-    }
-#ifdef NGT_INNER_PRODUCT
-    if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
-      NGT::ObjectSpace *rep = 0;
-#ifdef NGT_REFINEMENT
-      if (destination == 'r') {
-	rep = &index.getRefinementObjectSpace();
+  void appendTextVectors(std::string &indexPath, std::string &data, size_t dataSize, char appendMode, 
+			 std::string &destination, size_t ioSearchSize, float ioEpsilon) {
+    NGT::StdOstreamRedirector redirector(false);
+    redirector.begin();
+    NGT::Index index(indexPath);
+    index.enableLog();
+    auto append = destination.find('n') == std::string::npos;
+    auto refinement = destination.find('r') != std::string::npos;
+    index.appendFromTextObjectFile(data, dataSize, append, refinement);
+
+    if (appendMode == 't') {
+      if (ioSearchSize > 0) {
+	NGT::Index::InsertionOrder insertionOrder;
+	insertionOrder.nOfNeighboringNodes = ioSearchSize;
+	insertionOrder.epsilon = ioEpsilon;
+	std::cerr << "append: insertion order optimization is enabled. "
+		  << ioSearchSize << ":" << ioEpsilon << std::endl;
+	index.extractInsertionOrder(insertionOrder);
+	index.createIndexWithInsertionOrder(insertionOrder);
       } else {
-	rep = &index.getObjectSpace();
-      }
-#else
-      rep = &index.getObjectSpace();
-#endif
-      for (size_t idx = 1; idx < rep->getRepository().size(); idx++) {
-	std::vector<float> object;
-	rep->getObject(idx, object);
-	//object.emplace_back(sqrt(maxMag - mag));
-	object.back() = sqrt(maxMag - object.back());
-#ifdef NGT_REFINEMENT
-	if (destination == 'r') {
-	  index.updateToRefinement(idx, object);
-	} else {
-	  index.update(idx, object);
-	}
-#else
-	index.update(idx, object);
-#endif
+	index.createIndex();
       }
     }
-#endif
+    index.save();
+    index.close();
+    redirector.end();
   }
 
-  void appendTextVectors(std::string &indexPath, std::string &data, size_t dataSize, char appendMode, char destination, size_t ioSearchSize, float ioEpsilon, float cutRate) {
+  void appendRefinementVectors(std::string &indexPath, char appendMode, size_t ioSearchSize, float ioEpsilon) {
     NGT::StdOstreamRedirector redirector(false);
     redirector.begin();
     NGT::Index index(indexPath);
     index.enableLog();
-    appendTextVectors(index, data, dataSize, destination);
-    if (appendMode == 't') {
+    index.appendFromRefinementObjectFile();
+    if (appendMode == 'r') {
       if (ioSearchSize > 0) {
 	NGT::Index::InsertionOrder insertionOrder;
 	insertionOrder.nOfNeighboringNodes = ioSearchSize;
@@ -402,6 +349,68 @@ using namespace std;
     redirector.end();
   }
 
+  void appendTextVectorsInMemory(std::string &indexPath, std::string &data, size_t dataSize, char appendMode, 
+				 size_t ioSearchSize, float ioEpsilon) {
+    NGT::Index index(indexPath);
+    index.enableLog();
+    {
+      ifstream is(data);
+      if (!is) {
+	std::stringstream msg;
+	msg << "Cannot open the specified data file. " << data;
+	NGTThrowException(msg);
+      }
+      std::string line;
+      size_t counter = 0;
+      std::vector<float> objects;
+      while (getline(is, line)) {
+	if (is.eof()) break;
+	vector<string> tokens;
+	NGT::Common::tokenize(line, tokens, "\t, ");
+	for (auto &v : tokens) objects.emplace_back(NGT::Common::strtod(v));
+	counter++;
+      }
+      index.append(objects.data(), counter);
+      index.save();
+    }
+    if (appendMode == 'm') {
+      if (ioSearchSize > 0) {
+	NGT::Index::InsertionOrder insertionOrder;
+	insertionOrder.nOfNeighboringNodes = ioSearchSize;
+	insertionOrder.epsilon = ioEpsilon;
+	std::cerr << "append: insertion order optimization is enabled. "
+		  << ioSearchSize << ":" << ioEpsilon << std::endl;
+	index.extractInsertionOrder(insertionOrder);
+	index.createIndexWithInsertionOrder(insertionOrder);
+      } else {
+	index.createIndex();
+      }
+    }
+    index.save();
+    index.close();
+  }
+
+#ifdef NGT_APPENDING_BINARY
+
+void appendBinaryVectors(std::string &indexPath, std::string &data, size_t dataSize, char appendMode, std::string &destination) {
+    NGT::StdOstreamRedirector redirector(false);
+    redirector.begin();
+    NGT::Index index(indexPath);
+    index.enableLog();
+    std::vector<std::string> tokens;
+    NGT::Common::tokenize(data, tokens, ".");
+    auto append = destination.find('n') == std::string::npos;
+    auto refinement = destination.find('r') != std::string::npos;
+    index.appendFromBinaryObjectFile(data, dataSize, append, refinement);
+
+    if (appendMode == 'b') {
+      index.createIndex(32);
+    }
+    index.save();
+    index.close();
+    redirector.end();
+  }
+#endif
 
   void
   NGT::Command::append(Args &args)
@@ -413,9 +422,10 @@ using namespace std;
     try {
       indexPath = args.get("#1");
     } catch (...) {
-      cerr << "ngt: Error: DB is not specified." << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified." << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
     string data;
     try {
@@ -430,7 +440,6 @@ using namespace std;
 
     size_t ioSearchSize = args.getl("S", 0);
     float ioEpsilon = args.getf("E", 0.1);
-    float cutRate = args.getf("c", 0.02);
     
     if (debugLevel >= 1) {
       cerr << "thread size=" << threadSize << endl;
@@ -438,21 +447,34 @@ using namespace std;
     }
 
 
-    char appendMode = args.getChar("m", '-');
-    char destination = args.getChar("D", '-');
-    if (appendMode == '-') {
+    char appendMode = args.getChar("m", 't');
+    auto destination = args.getString("D", "o");
+    if (appendMode == 'n') {
       try {
         NGT::Index::append(indexPath, data, threadSize, dataSize);
       } catch (NGT::Exception &err) {
-        cerr << "ngt: Error. " << err.what() << endl;
-        cerr << usage << endl;
+	std::stringstream msg;
+	msg << err.what() << std::endl;
+	msg << usage;
+	NGTThrowException(msg);
       } catch (...) {
-        cerr << "ngt: Error" << endl;
-        cerr << usage << endl;
+	std::stringstream msg;
+	msg << usage;
+	NGTThrowException(msg);
       }
     } else if (appendMode == 't' || appendMode == 'T') {
-      appendTextVectors(indexPath, data, dataSize, appendMode, destination, ioSearchSize, ioEpsilon, cutRate);
+      appendTextVectors(indexPath, data, dataSize, appendMode, destination, ioSearchSize, ioEpsilon);
+    } else if (appendMode == 'r' || appendMode == 'R') {
+      appendRefinementVectors(indexPath, appendMode, ioSearchSize, ioEpsilon);
+    } else if (appendMode == 'm' || appendMode == 'M') {
+      appendTextVectorsInMemory(indexPath, data, dataSize, appendMode, ioSearchSize, ioEpsilon);
+#ifdef NGT_APPENDING_BINARY
+    } else if (appendMode == 'b' || appendMode == 'B') {
+      appendBinaryVectors(indexPath, data, dataSize, appendMode, destination);
+    }
+#else
     }
+#endif
   }
 
   void
@@ -550,8 +572,11 @@ using namespace std;
 	  stream << "Rank\tID\tDistance" << endl;
 	}
 	for (size_t i = 0; i < objects.size(); i++) {
-	  stream << i + 1 << "\t" << objects[i].id << "\t";
-	  stream << objects[i].distance << endl;
+	  if (searchParameters.outputMode == "e-") {
+	    stream << i + 1 << "\t" << objects[i].id << "\t" << 0.0 << std::endl;
+	  } else {
+	    stream << i + 1 << "\t" << objects[i].id << "\t" << objects[i].distance << std::endl;
+	  }
 	}
 	if (searchParameters.outputMode[0] == 'e') {
 	  stream << "# End of Search" << endl;
@@ -625,9 +650,10 @@ using namespace std;
     try {
       database = args.get("#1");
     } catch (...) {
-      cerr << "ngt: Error: DB is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
 
     SearchParameters searchParameters(args);
@@ -647,11 +673,14 @@ using namespace std;
 	cerr << "Peak VM size=" << NGT::Common::getProcessVmPeakStr() << std::endl;
       }
     } catch (NGT::Exception &err) {
-      cerr << "ngt: Error. " << err.what() << endl;
-      cerr << usage << endl;
+      std::stringstream msg;
+      msg << err.what() << std::endl;
+      msg << usage;
+      NGTThrowException(msg);
     } catch (...) {
-      cerr << "ngt: Error" << endl;
-      cerr << usage << endl;
+      std::stringstream msg;
+      msg << usage;
+      NGTThrowException(msg);
     }
 
   }
@@ -665,16 +694,18 @@ using namespace std;
     try {
       database = args.get("#1");
     } catch (...) {
-      cerr << "ngt: Error: DB is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
     try {
       args.get("#2");
     } catch (...) {
-      cerr << "ngt: Error: ID is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "ID is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
     char dataType = args.getChar("d", 'f');
     char mode = args.getChar("m", '-');
@@ -693,15 +724,15 @@ using namespace std;
 	try {
 	  ids = args.get("#2");
 	} catch (...) {
-	  cerr << "ngt: Error: Data file is not specified" << endl;
-	  cerr << usage << endl;
-	  return;
+	  std::stringstream msg;
+	  msg << "Data file is not specified" << endl;
+	  NGTThrowException(msg);
 	}
 	ifstream is(ids);
 	if (!is) {
-	  cerr << "ngt: Error: Cannot open the specified file. " << ids << endl;
-	  cerr << usage << endl;
-	  return;
+	  std::stringstream msg;
+	  msg << "Cannot open the specified file. " << ids << endl;
+	  NGTThrowException(msg);
 	}
 	string line;
 	int count = 0;
@@ -732,11 +763,14 @@ using namespace std;
       }
       NGT::Index::remove(database, objects, force);
     } catch (NGT::Exception &err) {
-      cerr << "ngt: Error. " << err.what() << endl;
-      cerr << usage << endl;
+      std::stringstream msg;
+      msg << err.what() << std::endl;
+      msg << usage;
+      NGTThrowException(msg);
     } catch (...) {
-      cerr << "ngt: Error" << endl;
-      cerr << usage << endl;
+      std::stringstream msg;
+      msg << usage;
+      NGTThrowException(msg);
     }
   }
 
@@ -748,26 +782,31 @@ using namespace std;
     try {
       database = args.get("#1");
     } catch (...) {
-      cerr << "ngt: Error: DB is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified." << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
     string exportFile;
     try {
       exportFile = args.get("#2");
     } catch (...) {
-      cerr << "ngt: Error: ID is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "ID is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
     try {
       NGT::Index::exportIndex(database, exportFile);
     } catch (NGT::Exception &err) {
-      cerr << "ngt: Error. " << err.what() << endl;
-      cerr << usage << endl;
+      std::stringstream msg;
+      msg << err.what() << std::endl;
+      msg << usage;
+      NGTThrowException(msg);
     } catch (...) {
-      cerr << "ngt: Error" << endl;
-      cerr << usage << endl;
+      std::stringstream msg;
+      msg << usage;
+      NGTThrowException(msg);
     }
   }
 
@@ -779,26 +818,27 @@ using namespace std;
     try {
       database = args.get("#1");
     } catch (...) {
-      cerr << "ngt: Error: DB is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
     string importFile;
     try {
       importFile = args.get("#2");
     } catch (...) {
-      cerr << "ngt: Error: ID is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "ID is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
 
     try {
       NGT::Index::importIndex(database, importFile);
     } catch (NGT::Exception &err) {
-      cerr << "ngt: Error. " << err.what() << endl;
+      cerr << err.what() << endl;
       cerr << usage << endl;
     } catch (...) {
-      cerr << "ngt: Error" << endl;
       cerr << usage << endl;
     }
 
@@ -812,9 +852,10 @@ using namespace std;
     try {
       indexName = args.get("#1");
     } catch (...) {
-      cerr << "Index is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified" << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
 
     // the number of forcedly pruned edges
@@ -826,15 +867,17 @@ using namespace std;
     cerr << "selectively pruned edge size=" << selectivelyPrunedEdgeSize << endl;
 
     if (selectivelyPrunedEdgeSize == 0 && forcedlyPrunedEdgeSize == 0) {
-      cerr << "prune: Error! Either of selective edge size or remaining edge size should be specified." << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "prune: Error! Either of selective edge size or remaining edge size should be specified." << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
 
     if (forcedlyPrunedEdgeSize != 0 && selectivelyPrunedEdgeSize != 0 && selectivelyPrunedEdgeSize >= forcedlyPrunedEdgeSize) {
-      cerr << "prune: Error! selective edge size is less than remaining edge size." << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg  << "prune: Error! selective edge size is less than remaining edge size." << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
 
     NGT::Index	index(indexName);
@@ -923,17 +966,19 @@ using namespace std;
     try {
       inIndexPath = args.get("#1");
     } catch (...) {
-      cerr << "ngt::reconstructGraph: Input index is not specified." << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "ngt::reconstructGraph: Input index is not specified." << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
     string outIndexPath;
     try {
       outIndexPath = args.get("#2");
     } catch (...) {
-      cerr << "ngt::reconstructGraph: Output index is not specified." << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "ngt::reconstructGraph: Output index is not specified." << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
 
     char mode = args.getChar("m", 'S');
@@ -944,6 +989,7 @@ using namespace std;
     double margin = args.getf("M", 0.2);
     char smode = args.getChar("s", '-');
     bool verbose = args.getBool("v");
+    char graphConversion = args.getChar("C", '-');
 
     // the number (rank) of original edges
     int numOfOutgoingEdges	= args.getl("o", -1);
@@ -972,6 +1018,7 @@ using namespace std;
 #else
     graphOptimizer.shortcutReductionRange = args.getf("R", 18.0);
 #endif
+    graphOptimizer.undirectedGraphConversion = graphConversion == '-' ? false : true;
     graphOptimizer.logDisabled = !verbose;
     
     graphOptimizer.set(numOfOutgoingEdges, numOfIncomingEdges, nOfQueries, nOfResults);
@@ -993,9 +1040,10 @@ using namespace std;
     try {
       indexPath = args.get("#1");
     } catch (...) {
-      cerr << "Index is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified" << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
 
     char mode = args.getChar("m", '-');
@@ -1018,7 +1066,7 @@ using namespace std;
 
       std::cout << "Successfully completed." << std::endl;
     } catch (NGT::Exception &err) {
-      cerr << "ngt: Error. " << err.what() << endl;
+      cerr << err.what() << endl;
       cerr << usage << endl;
     }
 
@@ -1037,18 +1085,20 @@ using namespace std;
     try {
       inIndexPath = args.get("#1");
     } catch (...) {
-      cerr << "Input index is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Input index is not specified" << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
 
     string outIndexPath;
     try {
       outIndexPath = args.get("#2");
     } catch (...) {
-      cerr << "Output index is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Output index is not specified" << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
 
     NGT::Index	index(inIndexPath);
@@ -1062,8 +1112,10 @@ using namespace std;
     try {
       GraphReconstructor::refineANNG(index, epsilon, expectedAccuracy, noOfEdges, exploreEdgeSize, batchSize);
     } catch (NGT::Exception &err) {
-      std::cerr << "Error!! Cannot refine the index. " << err.what() << std::endl;
-      return;
+      std::stringstream msg;
+      msg << "Error!! Cannot refine the index. " << err.what() << std::endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
     index.saveIndex(outIndexPath);
 #endif
@@ -1082,9 +1134,10 @@ using namespace std;
     try {
       indexPath = args.get("#1");
     } catch (...) {
-      cerr << "Index is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified." << endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
     
     char mode = args.getChar("m", 'c');
@@ -1099,9 +1152,10 @@ using namespace std;
       const string com = "cp -r " + indexPath + " " + path;
       int stat = system(com.c_str());
       if (stat != 0) {
-	std::cerr << "ngt::repair: Cannot create the specified index. " << path << std::endl;
-	cerr << usage << endl;
-	return;
+	std::stringstream msg;
+	msg << "ngt::repair: Cannot create the specified index. " << path << std::endl;
+	msg << usage;
+	NGTThrowException(msg);
       }
     }
 
@@ -1240,9 +1294,10 @@ using namespace std;
 	std::cerr << "Saving index." << std::endl;
 	index.saveIndex(path);
       } catch (NGT::Exception &err) {
-	cerr << "ngt: Error. " << err.what() << endl;
-	cerr << usage << endl;
-	return;
+	std::stringstream msg;
+	msg << err.what() << endl;
+	msg << usage;
+	NGTThrowException(msg);
       }
     }
   }
@@ -1259,9 +1314,10 @@ using namespace std;
     try {
       indexPath = args.get("#1");
     } catch (...) {
-      cerr << "Index is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified." << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
 
     GraphOptimizer::ANNGEdgeOptimizationParameter parameter;
@@ -1295,9 +1351,10 @@ using namespace std;
     try {
       database = args.get("#1");
     } catch (...) {
-      cerr << "ngt: Error: DB is not specified" << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
 
     size_t edgeSize = args.getl("E", UINT_MAX);
@@ -1319,7 +1376,6 @@ using namespace std;
       cerr << "ngt: NGT Error. " << err.what() << endl;
       cerr << usage << endl;
     } catch (...) {
-      cerr << "ngt: Error" << endl;
       cerr << usage << endl;
     }
   }
@@ -1327,7 +1383,7 @@ using namespace std;
 
   void NGT::Command::exportGraph(Args &args) {
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    std::cerr << "ngt: Error: exportGraph is not implemented." << std::endl;
+    std::cerr << "exportGraph is not implemented." << std::endl;
     abort();
 #else
     std::string usage = "ngt export-graph [-k #-of-edges] index";
@@ -1335,9 +1391,10 @@ using namespace std;
     try {
       indexPath = args.get("#1");
     } catch (...) {
-      cerr << "ngt::exportGraph: Index is not specified." << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
 
     int k = args.getl("k", 0);
@@ -1371,7 +1428,7 @@ using namespace std;
 
   void NGT::Command::exportObjects(Args &args) {
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    std::cerr << "ngt: Error: exportObjects is not implemented." << std::endl;
+    std::cerr << "exportObjects is not implemented." << std::endl;
     abort();
 #else
     std::string usage = "ngt export-objects index";
@@ -1379,9 +1436,10 @@ using namespace std;
     try {
       indexPath = args.get("#1");
     } catch (...) {
-      cerr << "ngt::exportGraph: Index is not specified." << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "Index is not specified" << endl;
+      msg << usage;
+      NGTThrowException(msg);
     }
 
     NGT::Index		index(indexPath);
diff --git a/lib/NGT/Command.h b/lib/NGT/Command.h
index 0279adb..b2d90ef 100644
--- a/lib/NGT/Command.h
+++ b/lib/NGT/Command.h
@@ -119,8 +119,9 @@ class Command {
   {
     std::ifstream		is(searchParameters.query);
     if (!is) {
-      std::cerr << "Cannot open the specified file. " << searchParameters.query << std::endl;
-      return;
+      std::stringstream msg;
+      msg << "Cannot open the specified query file. " << searchParameters.query;
+      NGTThrowException(msg);
     }
     search(index, searchParameters, is, stream);
   }
diff --git a/lib/NGT/Common.h b/lib/NGT/Common.h
index e449aaf..e45fc14 100644
--- a/lib/NGT/Common.h
+++ b/lib/NGT/Common.h
@@ -32,6 +32,7 @@
 #include	<iomanip>
 #include	<algorithm>
 #include	<typeinfo>
+#include	<limits>
 
 #include	<sys/time.h>
 #include	<fcntl.h>
@@ -56,6 +57,23 @@ namespace NGT {
   typedef	half_float::half	float16;
 #endif
 
+  class quint8 {
+  public:
+    quint8(uint8_t v):value(v){}
+    quint8 &operator=(uint8_t v) { value = v; return *this; }
+    operator uint8_t() const { return value; }
+    uint8_t get() { return value; }
+    uint8_t value;
+  };
+  class qsint8 {
+  public:
+    qsint8(int8_t v):value(v){}
+    qsint8 &operator=(int8_t v) { value = v; return *this; }
+    operator int8_t() const { return value; }
+    int8_t get() { return value; }
+    int8_t value;
+  };
+
 #ifdef NGT_BFLOAT
   class bfloat16 {
   public:
@@ -1201,6 +1219,7 @@ namespace NGT {
 	vectorSize--;
       }
     }
+
     iterator insert(iterator &i, const TYPE &data, SharedMemoryAllocator &allocator) {
       if (size() == 0) {
 	push_back(data, allocator);
@@ -1766,7 +1785,6 @@ namespace NGT {
       removedList->pop_back();
       return idx;
     }
-
     void removedListPush(size_t id) {
       if (removedList->size() == 0) {
 	removedList->push_back(id, allocator);
@@ -1780,6 +1798,16 @@ namespace NGT {
       }
       removedList->insert(rmi, id, allocator);
     }
+    void removedListRemove(size_t id) {
+      if (removedList->size() == 0) {
+	return;
+      }
+      Vector<size_t>::iterator rmi
+	= std::lower_bound(removedList->begin(allocator), removedList->end(allocator), id, std::greater<size_t>());
+      if ((rmi != removedList->end(allocator)) && ((*rmi) == id)) {
+	removedList->erase(rmi, allocator);
+      }
+    }
 #else
     void *construct() {
       SharedMemoryAllocator &allocator = getAllocator();
@@ -1812,6 +1840,14 @@ namespace NGT {
       return push(n);
     }
 
+    size_t insert(size_t idx, TYPE *n) {
+#ifdef ADVANCED_USE_REMOVED_LIST
+      removedListRemove(idx);
+#endif
+      put(idx, n);
+      return idx;
+    }
+
     bool isEmpty(size_t idx) {
       if (idx < size()) {
 	return (*array).at(idx, allocator) == 0;
@@ -2055,6 +2091,29 @@ namespace NGT {
       return std::vector<TYPE*>::size() - 1;
     }
 
+#ifdef ADVANCED_USE_REMOVED_LIST
+    void removedListRemove(size_t id) {
+      if (!removedList.empty()) {
+	std::priority_queue<size_t, std::vector<size_t>, std::greater<size_t>>	rl = removedList;
+	std::priority_queue<size_t, std::vector<size_t>, std::greater<size_t>>	newrl;
+	while (rl.size() != 0) {
+	  if (rl.top() == id) {
+	    rl.pop();
+	    while (rl.size() != 0) {
+	      newrl.push(rl.top());
+	      rl.pop();
+	    }
+	    removedList = newrl;
+	    break;
+	  }
+	  if (rl.top() > id) break;
+	  newrl.push(rl.top());
+	  rl.pop();
+	}
+      }
+    }
+#endif
+
     size_t insert(TYPE *n) {
 #ifdef ADVANCED_USE_REMOVED_LIST
       if (!removedList.empty()) {
@@ -2067,6 +2126,14 @@ namespace NGT {
       return push(n);
     }
 
+    size_t insert(size_t idx, TYPE *n) {
+#ifdef ADVANCED_USE_REMOVED_LIST
+      removedListRemove(idx);
+#endif
+      put(idx, n);
+      return idx;
+    }
+
     bool isEmpty(size_t idx) {
       if (idx < std::vector<TYPE*>::size()) {
 	return (*this)[idx] == 0;
@@ -2271,7 +2338,7 @@ namespace NGT {
 #ifdef ADVANCED_USE_REMOVED_LIST
     size_t count() { return std::vector<TYPE*>::size() == 0 ? 0 : std::vector<TYPE*>::size() - removedList.size() - 1; }
   protected:
-    std::priority_queue<size_t, std::vector<size_t>, std::greater<size_t> >	removedList;
+    std::priority_queue<size_t, std::vector<size_t>, std::greater<size_t>>	removedList;
 #endif
   };
 
@@ -2344,7 +2411,7 @@ namespace NGT {
     ObjectID		id;
   };
   
-  typedef std::priority_queue<ObjectDistance, std::vector<ObjectDistance>, std::less<ObjectDistance> > ResultPriorityQueue;
+  typedef std::priority_queue<ObjectDistance, std::vector<ObjectDistance>, std::less<ObjectDistance>> ResultPriorityQueue;
 
   class SearchContainer : public NGT::Container {
   public:
@@ -2365,6 +2432,7 @@ namespace NGT {
       useAllNodesInLeaf = sc.useAllNodesInLeaf;
       expectedAccuracy = sc.expectedAccuracy;
       visitCount = sc.visitCount;
+      insertion = sc.insertion;
       return *this;
     }
     virtual ~SearchContainer() {}
@@ -2376,6 +2444,7 @@ namespace NGT {
       edgeSize = -1;	// dynamically prune the edges during search. -1 means following the index property. 0 means using all edges.
       useAllNodesInLeaf = false;
       expectedAccuracy = -1.0;
+      insertion = false;
     }
     void setSize(size_t s) { size = s; }
     void setResults(ObjectDistances *r) { result = r; }
@@ -2385,6 +2454,7 @@ namespace NGT {
     void setExpectedAccuracy(float a) { expectedAccuracy = a; }
 
     inline bool resultIsAvailable() { return result != 0; }
+    float getEpsilon() { return explorationCoefficient - 1.0; }
     ObjectDistances &getResult() {
       if (result == 0) {
 	NGTThrowException("Inner error: results is not set");
@@ -2406,6 +2476,8 @@ namespace NGT {
     float		expectedAccuracy;
   private:
     ObjectDistances	*result;
+  public:
+    bool		insertion;
   };
 
 
@@ -2497,3 +2569,23 @@ namespace NGT {
 
 } // namespace NGT
 
+namespace std {
+  template<>
+    class numeric_limits<NGT::qsint8> {
+  public:
+    static NGT::qsint8 max() { return NGT::qsint8(127); }
+    static NGT::qsint8 min() { return NGT::qsint8(-128); }
+    static bool is_specialized() { return true; }
+  };
+}
+
+namespace std {
+  template<>
+    class numeric_limits<NGT::quint8> {
+  public:
+    static NGT::quint8 max() { return NGT::quint8(255); }
+    static NGT::quint8 min() { return NGT::quint8(0); }
+    static bool is_specialized() { return true; }
+  };
+}
+
diff --git a/lib/NGT/Graph.cpp b/lib/NGT/Graph.cpp
index 52d0dbb..44b9eeb 100644
--- a/lib/NGT/Graph.cpp
+++ b/lib/NGT/Graph.cpp
@@ -213,6 +213,21 @@ NeighborhoodGraph::Search::lorentzFloat16(NeighborhoodGraph &graph, NGT::SearchC
   graph.searchReadOnlyGraph<PrimitiveComparator::LorentzFloat16, DistanceCheckedSet>(sc, seeds);
 }
 #endif
+void
+NeighborhoodGraph::Search::l2Qsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds)
+{
+  graph.searchReadOnlyGraph<PrimitiveComparator::L2Qsint8, DistanceCheckedSet>(sc, seeds);
+}
+void
+NeighborhoodGraph::Search::innerProductQsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds)
+{
+  graph.searchReadOnlyGraph<PrimitiveComparator::InnerProductQsint8, DistanceCheckedSet>(sc, seeds);
+}
+void
+NeighborhoodGraph::Search::normalizedCosineSimilarityQsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds)
+{
+  graph.searchReadOnlyGraph<PrimitiveComparator::NormalizedCosineSimilarityQsint8, DistanceCheckedSet>(sc, seeds);
+}
 ////
 
 void
@@ -360,14 +375,34 @@ NeighborhoodGraph::Search::lorentzFloat16ForLargeDataset(NeighborhoodGraph &grap
   graph.searchReadOnlyGraph<PrimitiveComparator::LorentzFloat16, DistanceCheckedSetForLargeDataset>(sc, seeds);
 }
 #endif
-
+void
+NeighborhoodGraph::Search::l2Qsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds)
+{
+  graph.searchReadOnlyGraph<PrimitiveComparator::L2Qsint8, DistanceCheckedSetForLargeDataset>(sc, seeds);
+}
+void
+NeighborhoodGraph::Search::innerProductQsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds)
+{
+  graph.searchReadOnlyGraph<PrimitiveComparator::InnerProductQsint8, DistanceCheckedSetForLargeDataset>(sc, seeds);
+}
+void
+NeighborhoodGraph::Search::normalizedCosineSimilarityQsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds)
+{
+  graph.searchReadOnlyGraph<PrimitiveComparator::NormalizedCosineSimilarityQsint8, DistanceCheckedSetForLargeDataset>(sc, seeds);
+}
 #endif
 
 void
 NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds)
 {
-  ObjectRepository &objectRepository = getObjectRepository();
   NGT::ObjectSpace::Comparator &comparator = objectSpace->getComparator();
+  setupDistances(sc, seeds, comparator);
+}
+
+void
+NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, NGT::ObjectSpace::Comparator &comp)
+{
+  ObjectRepository &objectRepository = getObjectRepository();
   ObjectDistances tmp;
   tmp.reserve(seeds.size());
   size_t seedSize = seeds.size();
@@ -401,9 +436,9 @@ NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &see
       continue;
     }
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    seeds[i].distance = comparator(sc.object, *objectRepository.get(seeds[i].id));
+    seeds[i].distance = comp(sc.object, *objectRepository.get(seeds[i].id));
 #else
-    seeds[i].distance = comparator(sc.object, *objects[seeds[i].id]);
+    seeds[i].distance = comp(sc.object, *objects[seeds[i].id]);
 #endif
   }
 
@@ -519,6 +554,98 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds,
 
 #ifdef NGT_GRAPH_READ_ONLY_GRAPH
 
+#ifdef NGT_GRAPH_COMPACT_READ_ONLY_GRAPH
+  template <typename COMPARATOR, typename CHECK_LIST>
+  void
+    NeighborhoodGraph::searchReadOnlyGraph(NGT::SearchContainer &sc, ObjectDistances &seeds)
+  {
+
+    if (sc.explorationCoefficient == 0.0) {
+      sc.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT;
+    }
+
+    // setup edgeSize
+    size_t edgeSize = getEdgeSize(sc);
+
+    UncheckedSet unchecked;
+
+    CHECK_LIST distanceChecked(searchRepository.size());
+
+    ResultSet results;
+
+    setupDistances(sc, seeds, COMPARATOR::compare);
+    setupSeeds(sc, seeds, results, unchecked, distanceChecked);
+
+    Distance explorationRadius = sc.explorationCoefficient * sc.radius;
+    const size_t dimension = objectSpace->getPaddedDimension();
+    ReadOnlyGraphNode *nodes = &searchRepository.front();
+    ObjectDistance result;
+    ObjectDistance target;
+    const size_t prefetchSize = objectSpace->getPrefetchSize();
+    const size_t prefetchOffset = objectSpace->getPrefetchOffset();
+    while (!unchecked.empty()) {
+      target = unchecked.top();
+      unchecked.pop();
+      if (target.distance > explorationRadius) {
+	break;
+      }
+      auto *neighbors = &nodes[target.id];
+      auto *neighborptr = &(*neighbors)[0];
+      size_t neighborSize = neighbors->size() < edgeSize ? neighbors->size() : edgeSize;
+      auto *neighborendptr = neighborptr + neighborSize;
+      ObjectRepository &objectRepository = getObjectRepository();
+      pair<uint32_t, PersistentObject*> nsPtrs[neighborSize];
+      size_t nsPtrsSize = 0;
+      for (; neighborptr < neighborendptr; ++neighborptr) {
+#ifdef NGT_VISIT_COUNT
+	sc.visitCount++;
+#endif
+	if (!distanceChecked[*neighborptr]) {
+	  distanceChecked.insert(*neighborptr);
+          nsPtrs[nsPtrsSize].first = *neighborptr;
+	  nsPtrs[nsPtrsSize].second = objectRepository.get(*neighborptr);
+          if (nsPtrsSize < prefetchOffset) {
+            unsigned char *ptr = reinterpret_cast<unsigned char*>(objectRepository.get(*neighborptr));
+            MemoryCache::prefetch(ptr, prefetchSize);
+          }
+          nsPtrsSize++;
+        }
+      }
+      for (size_t idx = 0; idx < nsPtrsSize; idx++) {
+	auto *neighborptr = &nsPtrs[idx];
+	if (idx + prefetchOffset < nsPtrsSize) {
+	  unsigned char *ptr = reinterpret_cast<unsigned char*>((nsPtrs[idx + prefetchOffset]).second);
+	  MemoryCache::prefetch(ptr, prefetchSize);
+	}
+
+#ifdef NGT_DISTANCE_COMPUTATION_COUNT
+	sc.distanceComputationCount++;
+#endif
+	Distance distance = COMPARATOR::compare((void*)&sc.object[0],
+						(void*)&(*static_cast<PersistentObject*>(neighborptr->second))[0], dimension);
+	if (distance <= explorationRadius) {
+	  result.set(neighborptr->first, distance);
+	  unchecked.push(result);
+	  if (distance <= sc.radius) {
+	    results.push(result);
+	    if (results.size() > sc.size) {
+	      results.pop();
+	      sc.radius = results.top().distance;
+	      explorationRadius = sc.explorationCoefficient * sc.radius;
+	    }
+	  }
+	}
+      }
+    }
+    if (sc.resultIsAvailable()) {
+      ObjectDistances &qresults = sc.getResult();
+      qresults.moveFrom(results);
+    } else {
+      sc.workingResult = std::move(results);
+    }
+
+  }
+#else // NGT_GRAPH_COMPACT_READ_ONLY_GRAPH
   template <typename COMPARATOR, typename CHECK_LIST>
   void
     NeighborhoodGraph::searchReadOnlyGraph(NGT::SearchContainer &sc, ObjectDistances &seeds)
@@ -607,6 +734,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds,
     }
 
   }
+#endif // NGT_GRAPH_COMPACT_READ_ONLY_GRAPH
 
 #endif
 
@@ -616,7 +744,6 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds,
     if (sc.explorationCoefficient == 0.0) {
       sc.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT;
     }
-
     // setup edgeSize
     size_t edgeSize = getEdgeSize(sc);
 
@@ -634,10 +761,16 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds,
 #endif
 
     ResultSet results;
-    setupDistances(sc, seeds);
+    NGT::ObjectSpace::Comparator *comparatorPtr = 0;
+    if (sc.insertion) {
+      comparatorPtr = &objectSpace->getComparator();
+    } else {
+      comparatorPtr = &objectSpace->getComparatorForSearch();
+    }
+    NGT::ObjectSpace::Comparator &comparator = *comparatorPtr;
+    setupDistances(sc, seeds, comparator);
     setupSeeds(sc, seeds, results, unchecked, distanceChecked);
     Distance explorationRadius = sc.explorationCoefficient * sc.radius;
-    NGT::ObjectSpace::Comparator &comparator = objectSpace->getComparator();
     ObjectRepository &objectRepository = getObjectRepository();
     const size_t prefetchSize = objectSpace->getPrefetchSize();
     ObjectDistance result;
@@ -801,7 +934,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds,
 	  continue;
 	}
 	objtbl.push_back(getObjectRepository().get((*i).id));
-	GraphNode *n = 0;
+	GraphNode *n = 0;	
 	try {
 	  n = getNode((*i).id);
 	} catch (Exception &err) {
diff --git a/lib/NGT/Graph.h b/lib/NGT/Graph.h
index be2b651..b0dbaa2 100644
--- a/lib/NGT/Graph.h
+++ b/lib/NGT/Graph.h
@@ -189,6 +189,9 @@ namespace NGT {
     };
 
 #ifdef NGT_GRAPH_READ_ONLY_GRAPH
+#ifdef NGT_GRAPH_COMPACT_READ_ONLY_GRAPH
+    typedef std::vector<uint32_t> ReadOnlyGraphNode;
+#else
     class ReadOnlyGraphNode : public std::vector<std::pair<uint32_t, PersistentObject*>> {
       typedef std::vector<std::pair<uint32_t, PersistentObject*>> PARENT;
     public:
@@ -219,6 +222,7 @@ namespace NGT {
       size_t reservedSize;
       size_t usedSize;
     };
+#endif // NGT_GRAPH_COMPACT_READ_ONLY_GRAPH
 
     class SearchGraphRepository : public std::vector<ReadOnlyGraphNode> {
     public:
@@ -252,7 +256,11 @@ namespace NGT {
 	      }
 #else
 	      for (auto ni = node.begin(); ni != node.end(); ni++) {
+#ifdef NGT_GRAPH_COMPACT_READ_ONLY_GRAPH
+		searchNode.push_back((*ni).id);
+#else
 		searchNode.push_back(std::pair<uint32_t, Object*>((*ni).id, objectRepository.get((*ni).id)));
+#endif // NGT_GRAPH_COMPACT_READ_ONLY_GRAPH
 	      }
 #endif
 	    }
@@ -339,6 +347,14 @@ namespace NGT {
 	      }
 	      break;
 #endif
+	    case NGT::ObjectSpace::Qsuint8:
+	      switch (dtype) {
+	      case NGT::ObjectSpace::DistanceTypeL2 : 	            return l2Qsint8;
+	      case NGT::ObjectSpace::DistanceTypeInnerProduct :	    return innerProductQsint8;
+	      case NGT::ObjectSpace::DistanceTypeNormalizedCosine : return normalizedCosineSimilarityQsint8;
+	      default : 				            return l2Qsint8;
+	      }
+	      break;
 	    default:
 	      NGTThrowException("NGT::Graph::Search: Not supported object type.");
 	      break;
@@ -386,6 +402,14 @@ namespace NGT {
 	      default:						    return l2Float16ForLargeDataset;
 	      }
 #endif
+	    case NGT::ObjectSpace::Qsuint8:
+	      switch (dtype) {
+	      case NGT::ObjectSpace::DistanceTypeL2 : 	            return l2Qsint8ForLargeDataset;
+	      case NGT::ObjectSpace::DistanceTypeInnerProduct :     return innerProductQsint8ForLargeDataset;
+	      case NGT::ObjectSpace::DistanceTypeNormalizedCosine : return normalizedCosineSimilarityQsint8ForLargeDataset;
+	      default : 				            return l2Qsint8ForLargeDataset;
+	      }
+	      break;
 	    default:
 	      NGTThrowException("NGT::Graph::Search: Not supported object type.");
 	      break;
@@ -419,7 +443,9 @@ namespace NGT {
 	static void poincareFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);  // added by Nyapicom
 	static void lorentzFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);  // added by Nyapicom
 #endif
-
+	static void l2Qsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
+	static void innerProductQsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
+	static void normalizedCosineSimilarityQsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
 	static void l1Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
 	static void l2Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
 	static void l1FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
@@ -446,6 +472,9 @@ namespace NGT {
 	static void poincareFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
 	static void lorentzFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
 #endif
+	static void l2Qsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
+	static void innerProductQsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
+	static void normalizedCosineSimilarityQsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds);
       };
 #endif
 
@@ -599,7 +628,7 @@ namespace NGT {
       NeighborhoodGraph(): objectSpace(0) {
 	property.truncationThreshold = NGT_TRUNCATION_THRESHOLD;
 	// initialize random to generate random seeds
-#ifdef NGT_DISABLE_SRAND_FOR_RANDOM
+#ifdef NGT_ENABLE_TIME_SEED_FOR_RANDOM
 	struct timeval randTime;
 	gettimeofday(&randTime, 0);
 	srand(randTime.tv_usec);
@@ -920,7 +949,7 @@ namespace NGT {
 #elif defined(NGT_GRAPH_CHECK_VECTOR)
       typedef BooleanVector DistanceCheckedSet;
 #elif defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET)
-      typedef HashBasedBooleanSet DistanceCheckedSet;
+      typedef HashBasedBooleanSet<uint32_t> DistanceCheckedSet;
 #else
       class DistanceCheckedSet : public unordered_set<ObjectID> {
       public:
@@ -928,7 +957,7 @@ namespace NGT {
       };
 #endif
 
-      typedef HashBasedBooleanSet DistanceCheckedSetForLargeDataset;
+      typedef HashBasedBooleanSet<uint32_t> DistanceCheckedSetForLargeDataset;
 
       class NodeWithPosition : public ObjectDistance {
        public:
@@ -953,6 +982,7 @@ namespace NGT {
 #endif
 #endif
       void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds);
+      void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, NGT::ObjectSpace::Comparator &comp);
       void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, double (&comparator)(const void*, const void*, size_t));
 
       void setupSeeds(SearchContainer &sc, ObjectDistances &seeds, ResultSet &results,
diff --git a/lib/NGT/GraphOptimizer.h b/lib/NGT/GraphOptimizer.h
index d3ea102..9f73a37 100644
--- a/lib/NGT/GraphOptimizer.h
+++ b/lib/NGT/GraphOptimizer.h
@@ -79,6 +79,7 @@ namespace NGT {
       prefetchParameterOptimization = true;
       accuracyTableGeneration = true;
       shortcutReductionWithLessMemory = false;
+      undirectedGraphConversion = false;
       numOfThreads = 0;
     }
 
@@ -109,10 +110,18 @@ namespace NGT {
       NGT::ObjectSpace &objectSpace = index.getObjectSpace();
       NGT::ObjectRepository &objectRepository = objectSpace.getRepository();
       size_t nQueries = 200;
+      if (objectRepository.size() == 0) {
+	std::stringstream msg;
+	msg << "The object repository is empty. " << objectRepository.size();
+	NGTThrowException(msg);
+      }
       nQueries = objectRepository.size() - 1 < nQueries ? objectRepository.size() - 1 : nQueries;
-
+      if (nQueries == 0) {
+	std::stringstream msg;
+	msg << "# of the queries is unexpected zero value. " << nQueries << ":" << objectRepository.size();
+	NGTThrowException(msg);
+      }
       size_t step = objectRepository.size() / nQueries;
-      assert(step != 0);
       std::vector<size_t> ids;
       for (size_t startID = start; startID < step; startID++) {
 	for (size_t id = startID; id < objectRepository.size(); id += step) {
@@ -311,7 +320,7 @@ namespace NGT {
 	    // extract only edges from the index to reduce the memory usage.
 	    NGT::GraphReconstructor::extractGraph(graph, *graphIndex);
 	    NeighborhoodGraph::Property &prop = graphIndex->getGraphProperty();
-	    if (prop.graphType == NGT::NeighborhoodGraph::GraphTypeONNG) {
+	    if (undirectedGraphConversion) {
 	      NGT::GraphReconstructor::convertToANNG(graph);
 	    }
 	    NGT::GraphReconstructor::reconstructGraph(graph, *graphIndex, numOfOutgoingEdges, numOfIncomingEdges, maxNumOfEdges);
@@ -390,7 +399,7 @@ namespace NGT {
       if (searchParameterOptimization || prefetchParameterOptimization || accuracyTableGeneration) {
 	NGT::StdOstreamRedirector redirector(logDisabled);
 	redirector.begin();
-	NGT::Index	outIndex(outIndexPath, true);
+	NGT::Index	outIndex(outIndexPath, true);	
 	NGT::GraphIndex	&outGraph = static_cast<NGT::GraphIndex&>(outIndex.getIndex());
 	if (prefetchParameterOptimization) {
 	  if (!logDisabled) {
@@ -704,6 +713,7 @@ namespace NGT {
     bool prefetchParameterOptimization;
     bool accuracyTableGeneration;
     bool shortcutReductionWithLessMemory;
+    bool undirectedGraphConversion;
     float shortcutReductionRange;
     size_t numOfThreads;
   };
diff --git a/lib/NGT/GraphReconstructor.h b/lib/NGT/GraphReconstructor.h
index da48895..a78f810 100644
--- a/lib/NGT/GraphReconstructor.h
+++ b/lib/NGT/GraphReconstructor.h
@@ -26,7 +26,7 @@
 #warning "*** OMP is *NOT* available! ***"
 #endif
 
-//#define NGT_SHORTCUT_REDUCTION_WITH_ANGLE
+#define NGT_SHORTCUT_REDUCTION_WITH_ANGLE
 //#define NGT_SHORTCUT_REDUCTION_WITH_ADDITIONAL_CONDITION
 
 namespace NGT {
@@ -248,7 +248,7 @@ class GraphReconstructor {
       auto it = tmpGraph.begin() + idx;
       size_t id = idx + 1;
       try {
-	NGT::GraphNode &srcNode = *it;
+	NGT::GraphNode &srcNode = *it;	
 	std::unordered_map<uint32_t, std::pair<uint32_t, float>> neighbors;
 	for (uint32_t sni = 0; sni < srcNode.size(); ++sni) {
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR)
@@ -559,11 +559,11 @@ class GraphReconstructor {
 
     for (size_t id = 1; id < outGraph.repository.size(); id++) {
       try {
-	NGT::GraphNode &node = *outGraph.getNode(id);
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR)
 	std::cerr << "Not implemented yet." << std::endl;
 	abort();
 #else
+	NGT::GraphNode &node = *outGraph.getNode(id);
 	node.erase(std::remove_if(node.begin(), node.end(), [](NGT::ObjectDistance &n){ return (n.id & 0x80000000) != 0; }), node.end());
 #endif
       } catch(...) {}
@@ -833,7 +833,7 @@ class GraphReconstructor {
     }
     NGT::GraphIndex::showStatisticsOfGraph(outGraph);
 
-    std::vector<ObjectDistances> reverse(graph.size() + 1);
+    std::vector<ObjectDistances> reverse(graph.size() + 1);	
     for (size_t id = 1; id <= graph.size(); ++id) {
       try {
 	NGT::GraphNode &node = graph[id - 1];
@@ -849,15 +849,15 @@ class GraphReconstructor {
       }
     }
 
-    std::vector<std::pair<size_t, size_t> > reverseSize(graph.size() + 1);
+    std::vector<std::pair<size_t, size_t> > reverseSize(graph.size() + 1);	
     reverseSize[0] = std::pair<size_t, size_t>(0, 0);
     for (size_t rid = 1; rid <= graph.size(); ++rid) {
       reverseSize[rid] = std::pair<size_t, size_t>(reverse[rid].size(), rid);
     }
-    std::sort(reverseSize.begin(), reverseSize.end());
+    std::sort(reverseSize.begin(), reverseSize.end());		
 
 
-    std::vector<uint32_t> indegreeCount(graph.size(), 0);
+    std::vector<uint32_t> indegreeCount(graph.size(), 0);	
     size_t zeroCount = 0;
     for (size_t sizerank = 0; sizerank <= reverseSize.size(); sizerank++) {
 
@@ -865,17 +865,17 @@ class GraphReconstructor {
 	zeroCount++;
 	continue;
       }
-      size_t rid = reverseSize[sizerank].second;
-      ObjectDistances &rnode = reverse[rid];
+      size_t rid = reverseSize[sizerank].second;	
+      ObjectDistances &rnode = reverse[rid];		
       for (auto rni = rnode.begin(); rni != rnode.end(); ++rni) {
-	if (indegreeCount[(*rni).id] >= reverseEdgeSize) {
+	if (indegreeCount[(*rni).id] >= reverseEdgeSize) {	
 	  continue;
 	}
-	NGT::GraphNode &node = *outGraph.getNode(rid);
+	NGT::GraphNode &node = *outGraph.getNode(rid);	
 	if (indegreeCount[(*rni).id] > 0 && node.size() >= originalEdgeSize) {
 	  continue;
 	}
-
+	
 	node.push_back(NGT::ObjectDistance((*rni).id, (*rni).distance));
 	indegreeCount[(*rni).id]++;
       }
diff --git a/lib/NGT/HashBasedBooleanSet.h b/lib/NGT/HashBasedBooleanSet.h
index 2495824..077f8c4 100644
--- a/lib/NGT/HashBasedBooleanSet.h
+++ b/lib/NGT/HashBasedBooleanSet.h
@@ -22,16 +22,16 @@
 #include <climits>
 #include <unordered_set>
 
-class HashBasedBooleanSet{
+template <typename TYPE> class HashBasedBooleanSet{
  private:
-  uint32_t *_table;
+  TYPE *_table;
   uint32_t _tableSize;
   uint32_t _mask;
   
-  std::unordered_set<uint32_t> _stlHash;
+  std::unordered_set<TYPE> _stlHash;
   
   
-  inline uint32_t _hash1(const uint32_t value){
+  inline uint32_t _hash1(const TYPE value){
     return value & _mask;
   }
   
@@ -56,8 +56,8 @@ class HashBasedBooleanSet{
       std::cerr << "[WARN] table size is not 2^N :  " <<  tableSize << std::endl;
     }
     
-    _table = new uint32_t[tableSize];
-    memset(_table, 0, tableSize * sizeof(uint32_t));
+    _table = new TYPE[tableSize];
+    memset(_table, 0, tableSize * sizeof(TYPE));
   }
   
   ~HashBasedBooleanSet(){
@@ -65,7 +65,7 @@ class HashBasedBooleanSet{
     _stlHash.clear();
   }
   
-  inline bool operator[](const uint32_t num){
+  inline bool operator[](const TYPE num){
     const uint32_t hashValue = _hash1(num);
     
     auto v = _table[hashValue];
@@ -81,8 +81,8 @@ class HashBasedBooleanSet{
     return true;
   }
   
-  inline void set(const uint32_t num){
-    uint32_t &value = _table[_hash1(num)];
+  inline void set(const TYPE num){
+    TYPE &value = _table[_hash1(num)];
     if(value == 0){
       value = num;
     }else{
@@ -92,11 +92,11 @@ class HashBasedBooleanSet{
     }
   }
   
-  inline void insert(const uint32_t num){
+  inline void insert(const TYPE num){
     set(num);
   }
 
-  inline void reset(const uint32_t num){
+  inline void reset(const TYPE num){
     const uint32_t hashValue = _hash1(num);
     if(_table[hashValue] != 0){
       if(_table[hashValue] != num){
diff --git a/lib/NGT/Index.cpp b/lib/NGT/Index.cpp
index 4402760..a09f51d 100644
--- a/lib/NGT/Index.cpp
+++ b/lib/NGT/Index.cpp
@@ -23,6 +23,7 @@
 #include	"NGT/Thread.h"
 #include	"NGT/GraphReconstructor.h"
 #include	"NGT/Version.h"
+#include	"NGT/NGTQ/ObjectFile.h"
 
 using namespace std;
 using namespace NGT;
@@ -246,6 +247,494 @@ NGT::Index::append(const string &database, const float *data, size_t dataSize, s
   return;
 }
 
+void 
+NGT::Index::appendFromRefinementObjectFile(const std::string &indexPath) {
+  NGT::Index index(indexPath);
+  index.appendFromRefinementObjectFile();
+  index.createIndex();
+  index.save();
+  index.close();
+}
+
+
+void 
+NGT::Index::appendFromRefinementObjectFile() {
+  NGT::Property prop;
+  getProperty(prop);
+  float maxMag = prop.maxMagnitude;
+  bool maxMagSkip = false;
+  if (maxMag > 0.0) maxMagSkip = true;
+  auto &ros = getRefinementObjectSpace();
+  auto &rrepo = ros.getRepository();
+  size_t dim = getDimension();
+  auto dataSize = rrepo.size();
+  std::vector<float> addedElement(dataSize);
+  if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+    NGT::Timer timer;
+    timer.start();
+    for (size_t idx = 1; idx < rrepo.size(); idx++) {
+      if (rrepo[idx] == 0) {
+	continue;
+      }
+      std::vector<float> object;
+      ros.getObject(idx, object);
+      if (object.size() != dim) {
+	if (object.size() == dim + 1) {
+	  object.resize(dim);
+	} else {
+	  std::stringstream msg;
+	  msg << "Fatal inner error! iInvalid dimension. " << dim << ":" << object.size();;
+	  NGTThrowException(msg);
+	}
+      }
+      if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	double mag = 0.0;
+	for (auto &v : object) {
+	  mag += v * v;
+	}
+	if (!maxMagSkip && mag > maxMag) {
+	  maxMag = mag;
+	}
+	addedElement[idx] = mag;
+      }
+      if (idx % 2000000 == 0) {
+	timer.stop();
+	std::cerr << "processed " << static_cast<float>(idx) / 1000000.0 << "M objects."
+		  << " maxMag=" << maxMag << " time=" << timer << std::endl;
+	timer.restart();
+      }
+    }
+    timer.stop();
+    std::cerr << "time=" << timer << std::endl;
+    std::cerr << "maxMag=" << maxMag << std::endl;
+    std::cerr << "dataSize=" << dataSize << std::endl;
+    if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+      if (static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude <= 0.0 && maxMag > 0.0) {
+	static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude = maxMag;
+      }
+    }
+  }
+
+  if (getObjectSpace().isQintObjectType() && prop.clippingRate >= 0.0) {
+    std::priority_queue<float> min;
+    std::priority_queue<float, vector<float>, std::greater<float>> max;
+    {
+      NGT::Timer timer;
+      timer.start();
+      auto clippingSize = static_cast<float>(dataSize * dim) * prop.clippingRate;
+      clippingSize = clippingSize == 0 ? 1 : clippingSize;
+      size_t counter = 0;
+      for (size_t idx = 1; idx < rrepo.size(); idx++) {
+	if (rrepo[idx] == 0) continue;
+	std::vector<float> object;
+	ros.getObject(idx, object);
+	if (object.size() != dim) object.resize(dim);
+	if (getObjectSpace().isNormalizedDistance()) {
+	  ObjectSpace::normalize(object);
+	}
+	if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	  float v = maxMag - addedElement[idx];
+	  object.emplace_back(sqrt(v >= 0.0 ? v : 0.0));
+	}
+	for (auto &v : object) {
+	  if (max.size() < clippingSize) {
+	    max.push(v);
+	  } else if (max.top() <= v) {
+	    max.push(v);
+	    max.pop();
+	  }
+	  if (min.size() < clippingSize) {
+	    min.push(v);
+	  } else if (min.top() >= v) {
+	    min.push(v);
+	    min.pop();
+	  }
+	}
+	counter++;
+      }
+      std::cerr << "time=" << timer << std::endl;
+      if (counter != 0) {
+	std::cerr << "max:min=" << max.top() << ":" << min.top() << std::endl;
+	setQuantizationFromMaxMin(max.top(), min.top());
+      }
+    }
+  }
+
+  {
+
+    for (size_t idx = 1; idx < rrepo.size(); idx++) {
+      if (rrepo[idx] == 0) continue;
+      std::vector<float> object;
+      ros.getObject(idx, object);
+      if (object.size() != dim) object.resize(dim);
+      if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	object.emplace_back(sqrt(maxMag - addedElement[idx]));
+      }
+      append(object);
+      if (idx + 1 != getObjectRepositorySize()) {
+	std::stringstream msg;
+	msg << "The object repository and refinement repository are inconsistent. " << idx + 1 << ":" << getObjectRepositorySize();
+	NGTThrowException(msg);
+      }
+    }
+  }
+
+}
+
+void 
+NGT::Index::insertFromRefinementObjectFile() {
+  NGT::Property prop;
+  getProperty(prop);
+  float maxMag = prop.maxMagnitude;
+  if (prop.maxMagnitude <= 0.0) {
+    std::stringstream msg;
+    msg << "Max magnitude is not set yet. " << maxMag;
+    NGTThrowException(msg);
+  }
+  auto &ros = getRefinementObjectSpace();
+  auto &rrepo = ros.getRepository();
+  auto &repo = getObjectSpace().getRepository();
+  size_t dim = getDimension();
+  auto dataSize = rrepo.size();
+  std::vector<float> addedElement(dataSize);
+
+  for (size_t idx = 1; idx < rrepo.size(); idx++) {
+    if (rrepo[idx] == 0) continue;
+    if (repo.size() > idx && repo[idx] != 0) continue;
+    std::vector<float> object;
+    ros.getObject(idx, object);
+    if (object.size() != dim) {
+      if (object.size() == dim + 1) {
+	object.resize(dim);
+      } else {
+	std::stringstream msg;
+	msg << "Fatal inner error! iInvalid dimension. " << dim << ":" << object.size();;
+	NGTThrowException(msg);
+      }
+    }
+    if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+      double mag = 0.0;
+      for (auto &v : object) {
+	mag += v * v;
+      }
+      if (mag > maxMag) {
+	maxMag = mag;
+      }
+      object.emplace_back(sqrt(maxMag - mag));
+    }
+    try {
+      insert(idx, object);
+    } catch(NGT::Exception &err) {
+      std::stringstream msg;
+      msg << "Cannot insert. " << idx << " " << err.what();
+      NGTThrowException(msg);
+    }
+    if (idx + 1 > getObjectRepositorySize()) {
+      std::stringstream msg;
+      msg << "The object repository and refinement repository are inconsistent. " << idx + 1 << ":" << getObjectRepositorySize();
+      NGTThrowException(msg);
+    }
+  }
+}
+
+void 
+NGT::Index::appendFromTextObjectFile(const std::string &indexPath, const std::string &data, size_t dataSize,
+				     bool append, bool refinement) {
+//#define APPEND_TEST
+  
+  NGT::Index index(indexPath);
+  index.appendFromTextObjectFile(data, dataSize, append, refinement);
+  index.createIndex();
+  index.save();
+  index.close();
+}
+
+void 
+NGT::Index::appendFromTextObjectFile(const std::string &data, size_t dataSize, bool append, bool refinement) {
+  NGT::Property prop;
+  getProperty(prop);
+  float maxMag = prop.maxMagnitude;
+  bool maxMagSkip = false;
+  if (maxMag > 0.0) maxMagSkip = true;
+  std::vector<float> addedElement;
+  size_t dim = 0;
+  if (append && prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+    NGT::Timer timer;
+    timer.start();
+    ifstream is(data);
+    if (!is) {
+      std::stringstream msg;
+      msg << "Cannot open the specified data file. " << data;
+      NGTThrowException(msg);
+    }
+    std::string line;
+    size_t counter = 0;
+    while (getline(is, line)) {
+      if (is.eof()) break;
+      if (dataSize > 0 && counter > dataSize) break;
+      vector<float> object;
+      vector<string> tokens;
+      NGT::Common::tokenize(line, tokens, "\t, ");
+      if (tokens.back() == "") tokens.pop_back();
+      if (dim == 0) {
+	dim = tokens.size();
+      } else if (dim != tokens.size()) {
+	std::stringstream msg;
+	msg << "The dimensions are not inconsist. " << counter << ":" << dim << "x" << tokens.size() << data;
+	NGTThrowException(msg);
+      }
+      if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	double mag = 0.0;
+	for (auto &vstr : tokens) {
+	  auto v = NGT::Common::strtof(vstr);
+	  mag += v * v;
+	}
+	if (!maxMagSkip && mag > maxMag) {
+	  maxMag = mag;
+	}
+	addedElement.emplace_back(mag);
+      }
+      counter++;
+      if (counter % 2000000 == 0) {
+	timer.stop();
+	std::cerr << "processed " << static_cast<float>(counter) / 1000000.0 << "M objects."
+		  << " maxMag=" << maxMag << " time=" << timer << std::endl;
+	timer.restart();
+      }
+    }
+    timer.stop();
+    dataSize = counter;
+    std::cerr << "time=" << timer << std::endl;
+    std::cerr << "maxMag=" << maxMag << std::endl;
+    std::cerr << "dataSize=" << dataSize << std::endl;
+    if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+      if (static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude <= 0.0 && maxMag > 0.0) {
+	static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude = maxMag;
+      }
+    }
+  }
+  if (append && getObjectSpace().isQintObjectType() && prop.clippingRate >= 0.0) {
+    std::priority_queue<float> min;
+    std::priority_queue<float, vector<float>, std::greater<float>> max;
+    {
+      NGT::Timer timer;
+      timer.start();
+      ifstream is(data);
+      if (!is) {
+	std::stringstream msg;
+	msg << "Cannot open the specified data file. " << data;
+	NGTThrowException(msg);
+      }
+      auto clippingSize = static_cast<float>(dataSize * dim) * prop.clippingRate;
+      clippingSize = clippingSize == 0 ? 1 : clippingSize;
+      std::string line;
+      size_t counter = 0;
+      while (getline(is, line)) {
+	if (is.eof()) break;
+	if (dataSize > 0 && counter > dataSize) break;
+	vector<float> object;
+	vector<string> tokens;
+	NGT::Common::tokenize(line, tokens, "\t, ");
+	if (tokens.back() == "") tokens.pop_back();
+	for (auto &vstr : tokens) {
+	  auto v = NGT::Common::strtof(vstr);
+	  object.emplace_back(v);
+	}
+	if (getObjectSpace().isNormalizedDistance()) {
+	  ObjectSpace::normalize(object);
+	}
+	if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	  float v = maxMag - addedElement[counter];
+	  object.emplace_back(sqrt(v >= 0.0 ? v : 0.0));
+	}
+	for (auto &v : object) {
+	  if (max.size() < clippingSize) {
+	    max.push(v);
+	  } else if (max.top() <= v) {
+	    max.push(v);
+	    max.pop();
+	  }
+	  if (min.size() < clippingSize) {
+	    min.push(v);
+	  } else if (min.top() >= v) {
+	    min.push(v);
+	    min.pop();
+	  }
+	}
+	counter++;
+      }
+      std::cerr << "time=" << timer << std::endl;
+      if (counter != 0) {
+	std::cerr << "max:min=" << max.top() << ":" << min.top() << std::endl;
+	setQuantizationFromMaxMin(max.top(), min.top());
+      }
+    }
+  }
+  if (append || refinement) {
+
+    ifstream is(data);
+    if (!is) {
+      std::stringstream msg;
+      msg << "Cannot open the specified data file. " << data;
+      NGTThrowException(msg);
+    }
+    std::string line;
+    size_t counter = 0;
+    while (getline(is, line)) {
+      if (is.eof()) break;
+      if (dataSize > 0 && counter > dataSize) break;
+      vector<float> object;
+      vector<string> tokens;
+      NGT::Common::tokenize(line, tokens, "\t, ");
+      if (tokens.back() == "") tokens.pop_back();
+      for (auto &vstr : tokens) {
+	auto v = NGT::Common::strtof(vstr);
+	object.emplace_back(v);
+      }
+#ifdef NGT_REFINEMENT
+      if (refinement) {
+	appendToRefinement(object);
+      }
+#endif
+      if (append) {
+	if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct && maxMag > 0.0) {
+	  float v = maxMag - addedElement[counter];
+	  object.emplace_back(sqrt(v >= 0.0 ? v : 0.0));
+	}
+	NGT::Index::append(object);
+      }
+      counter++;
+    }
+  }
+
+}
+
+
+void
+NGT::Index::appendFromBinaryObjectFile(const std::string &indexPath, const std::string &data,
+				       size_t dataSize, bool append, bool refinement) {
+  NGT::Index index(indexPath);
+  index.appendFromBinaryObjectFile(data, dataSize, append, refinement);
+  index.createIndex();
+  index.save();
+  index.close();
+}
+
+void
+NGT::Index::appendFromBinaryObjectFile(const std::string &data, size_t dataSize, bool append, bool refinement) {
+  NGT::Property prop;
+  getProperty(prop);
+  float maxMag = prop.maxMagnitude;
+  bool maxMagSkip = false;
+  if (maxMag > 0.0) maxMagSkip = true;
+  std::vector<float> addedElement;
+  size_t dim = 0;
+  if (append && prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+    NGT::Timer timer;
+    timer.start();
+    StaticObjectFileLoader loader(data);
+    size_t counter = 0;
+    while (!loader.isEmpty()) {
+      if (dataSize > 0 && counter > dataSize) break;
+      auto object = loader.getObject();
+      if (dim == 0) {
+	dim = object.size();
+      } else if (dim != object.size()) {
+	std::stringstream msg;
+	msg << "The dimensions are not inconsist. " << counter << ":" << dim << "x" << object.size() << data;
+	NGTThrowException(msg);
+      }
+      if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	double mag = 0.0;
+	for (auto &v : object) {
+	  mag += v * v;
+	}
+	if (!maxMagSkip && mag > maxMag) {
+	  maxMag = mag;
+	}
+	addedElement.emplace_back(mag);
+      }
+      counter++;
+      if (counter % 2000000 == 0) {
+	timer.stop();
+	std::cerr << "processed " << static_cast<float>(counter) / 1000000.0 << "M objects."
+		  << " maxMag=" << maxMag << " time=" << timer << std::endl;
+	timer.restart();
+      }
+    }
+    timer.stop();
+    dataSize = counter;
+    std::cerr << "time=" << timer << std::endl;
+    std::cerr << "maxMag=" << maxMag << std::endl;
+    std::cerr << "dataSize=" << dataSize << std::endl;
+    if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+      if (static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude <= 0.0 && maxMag > 0.0) {
+	static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude = maxMag;
+      }
+    }
+  }
+  if (append && getObjectSpace().isQintObjectType() && prop.clippingRate >= 0.0) {
+    std::priority_queue<float> min;
+    std::priority_queue<float, vector<float>, std::greater<float>> max;
+    {
+      NGT::Timer timer;
+      timer.start();
+      auto clippingSize = static_cast<float>(dataSize * dim) * prop.clippingRate;
+      clippingSize = clippingSize == 0 ? 1 : clippingSize;
+      StaticObjectFileLoader loader(data);
+      size_t counter = 0;
+      while (!loader.isEmpty()) {
+	if (dataSize > 0 && counter > dataSize) break;
+	auto object = loader.getObject();
+	if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	  float v = maxMag - addedElement[counter];
+	  object.emplace_back(sqrt(v >= 0.0 ? v : 0.0));
+	}
+	for (auto &v : object) {
+	  if (max.size() < clippingSize) {
+	    max.push(v);
+	  } else if (max.top() <= v) {
+	    max.push(v);
+	    max.pop();
+	  }
+	  if (min.size() < clippingSize) {
+	    min.push(v);
+	  } else if (min.top() >= v) {
+	    min.push(v);
+	    min.pop();
+	  }
+	}
+	counter++;
+      }
+      std::cerr << "time=" << timer << std::endl;
+      if (counter != 0) {
+	std::cerr << "max:min=" << max.top() << ":" << min.top() << std::endl;
+	setQuantizationFromMaxMin(max.top(), min.top());
+      }
+    }
+  }
+  if (append || refinement) {
+    StaticObjectFileLoader loader(data);
+    size_t counter = 0;
+    while (!loader.isEmpty()) {
+      if (dataSize > 0 && counter > dataSize) break;
+      auto object = loader.getObject();
+#ifdef NGT_REFINEMENT
+      if (refinement) {
+	appendToRefinement(object);
+      }
+#endif
+      if (append) {
+	if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	  object.emplace_back(sqrt(maxMag - addedElement[counter]));
+	}
+	NGT::Index::append(object);
+      }
+      counter++;
+    }
+  }
+}
+
 void
 NGT::Index::remove(const string &database, vector<ObjectID> &objects, bool force) {
   NGT::Index	index(database);
@@ -344,6 +833,27 @@ NGT::Index::makeSparseObject(std::vector<uint32_t> &object)
   return obj;
 }
 
+void 
+NGT::Index::setQuantizationFromMaxMin(float max, float min) {
+
+  float offset;
+  float scale;
+  if (getObjectSpace().getObjectType() == typeid(NGT::qsint8)) {
+    offset = 0.0;
+    scale = std::max(fabs(max), fabs(min));
+  } else {
+    offset = min;
+    scale = max - offset;
+  }
+  setQuantization(scale, offset);
+}
+
+void
+NGT::Index::setQuantization(float scale, float offset) {
+  static_cast<NGT::GraphIndex&>(getIndex()).property.quantizationScale = scale;
+  static_cast<NGT::GraphIndex&>(getIndex()).property.quantizationOffset = offset;
+  getObjectSpace().setQuantization(scale, offset);
+}
 
 void
 NGT::Index::extractInsertionOrder(InsertionOrder &insertionOrder) {
@@ -358,26 +868,45 @@ NGT::Index::createIndex(size_t threadNumber, size_t sizeOfRepository) {
     InsertionOrder insertionOrder;
     NGT::Property prop;
     getProperty(prop);
-#ifdef NGT_INNER_PRODUCT
-    if (prop.distanceType == ObjectSpace::DistanceTypeInnerProduct) {
-      size_t beginId = 1;
-      NGT::GraphRepository &graphRepository = static_cast<NGT::GraphIndex&>(getIndex()).repository;
+    if (prop.objectType == NGT::ObjectSpace::ObjectType::Qsuint8
+	) {
+      auto &ros = getRefinementObjectSpace();
+      auto &os = getObjectSpace();
+      if (&ros != 0 && ros.getRepository().size() > os.getRepository().size()) {
+	if (os.getRepository().size() <= 1) {
+	  if (ros.getRepository().size() < 100) {
+	    std::cerr << "Warning! # of refinement objects is too small. " << ros.getRepository().size() << std::endl;
+	  }
+	  appendFromRefinementObjectFile();
+	} else {
+	  if (prop.quantizationScale <= 0.0) {
+	    stringstream msg;
+	    msg << "Fatal inner error! Scalar quantization parameters are not set yet. " << prop.quantizationScale << ":" << prop.quantizationOffset;
+	    NGTThrowException(msg);
+	  }
+	  insertFromRefinementObjectFile();
+	}
+      }
+    } else {
+      if (prop.distanceType == ObjectSpace::DistanceTypeInnerProduct) {
+	size_t beginId = 1;
+	NGT::GraphRepository &graphRepository = static_cast<NGT::GraphIndex&>(getIndex()).repository;
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
-      auto &graphNodes = static_cast<PersistentRepository<GraphNode>&>(graphRepository);
-      auto &graphNodeVector = reinterpret_cast<PersistentRepository<void>&>(graphNodes);
+	auto &graphNodes = static_cast<PersistentRepository<GraphNode>&>(graphRepository);
+	auto &graphNodeVectors = reinterpret_cast<PersistentRepository<void>&>(graphNodes);
 #else
-      auto &graphNodes = static_cast<Repository<GraphNode>&>(graphRepository);
-      auto &graphNodeVector = reinterpret_cast<Repository<void>&>(graphNodes);
+	auto &graphNodes = static_cast<Repository<GraphNode>&>(graphRepository);
+	auto &graphNodeVectors = reinterpret_cast<Repository<void>&>(graphNodes);
 #endif
-      if (prop.maxMagnitude != 0.0) {
-	getObjectSpace().setMagnitude(prop.maxMagnitude, graphNodeVector, beginId);
-      } else {
-	auto maxMag  = getObjectSpace().computeMaxMagnitude(beginId);
-	static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude = maxMag;
-	getObjectSpace().setMagnitude(maxMag, graphNodeVector, beginId);
+	if (prop.maxMagnitude <= 0.0) {
+	  getObjectSpace().setMagnitude(prop.maxMagnitude, graphNodeVectors, beginId);
+	} else {
+	  auto maxMag = getObjectSpace().computeMaxMagnitude(beginId);
+	  static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude = maxMag;
+	  getObjectSpace().setMagnitude(maxMag, graphNodeVectors, beginId);
+	}
       }
     }
-#endif
     if (prop.nOfNeighborsForInsertionOrder != 0) {
       insertionOrder.nOfNeighboringNodes = prop.nOfNeighborsForInsertionOrder;
       insertionOrder.epsilon = prop.epsilonForInsertionOrder;
@@ -412,9 +941,10 @@ NGT::Index::Property::set(NGT::Property &prop) {
   if (prop.prefetchOffset != -1) prefetchOffset = prop.prefetchOffset;
   if (prop.prefetchSize != -1) prefetchSize = prop.prefetchSize;
   if (prop.accuracyTable != "") accuracyTable = prop.accuracyTable;
-#ifdef NGT_INNER_PRODUCT
   if (prop.maxMagnitude	!= -1) maxMagnitude = prop.maxMagnitude;
-#endif
+  if (prop.quantizationScale != -1) quantizationScale = prop.quantizationScale;
+  if (prop.quantizationOffset != -1) quantizationOffset = prop.quantizationOffset;
+  if (prop.clippingRate != -1) clippingRate = prop.clippingRate;
   if (prop.nOfNeighborsForInsertionOrder != -1) nOfNeighborsForInsertionOrder = prop.nOfNeighborsForInsertionOrder;
   if (prop.epsilonForInsertionOrder != -1) epsilonForInsertionOrder = prop.epsilonForInsertionOrder;
 }
@@ -439,9 +969,10 @@ NGT::Index::Property::get(NGT::Property &prop) {
   prop.prefetchOffset = prefetchOffset;
   prop.prefetchSize = prefetchSize;
   prop.accuracyTable = accuracyTable;
-#ifdef NGT_INNER_PRODUCT
   prop.maxMagnitude = maxMagnitude;
-#endif
+  prop.quantizationScale = quantizationScale;
+  prop.quantizationOffset = quantizationOffset;
+  prop.clippingRate = clippingRate;
   prop.nOfNeighborsForInsertionOrder = nOfNeighborsForInsertionOrder;
   prop.epsilonForInsertionOrder = epsilonForInsertionOrder;
 }
@@ -574,16 +1105,10 @@ void
 NGT::GraphIndex::constructObjectSpace(NGT::Property &prop) {
   assert(prop.dimension != 0);
   size_t dimension = prop.dimension;
-#ifdef NGT_INNER_PRODUCT
   if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeSparseJaccard ||
       prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
     dimension++;
   }
-#else
-  if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeSparseJaccard) {
-    dimension++;
-  }
-#endif
 
   switch (prop.objectType) {
   case NGT::ObjectSpace::ObjectType::Float :
@@ -597,27 +1122,33 @@ NGT::GraphIndex::constructObjectSpace(NGT::Property &prop) {
     objectSpace = new ObjectSpaceRepository<float16, float>(dimension, typeid(float16), prop.distanceType);
     break;
 #endif
+  case NGT::ObjectSpace::ObjectType::Qsuint8 :
+    objectSpace = new ObjectSpaceRepository<qsint8, float>(dimension, typeid(qsint8), prop.distanceType);
+    break;
   default:
     stringstream msg;
     msg << "Invalid Object Type in the property. " << prop.objectType;
     NGTThrowException(msg);
   }
+  objectSpace->setQuantization(prop.quantizationScale, prop.quantizationOffset);
 #ifdef NGT_REFINEMENT
+  auto dtype = prop.distanceType;
+  dtype = dtype == ObjectSpace::DistanceTypeInnerProduct ? ObjectSpace::DistanceTypeDotProduct : prop.distanceType;
   switch (prop.refinementObjectType) {
   case NGT::ObjectSpace::ObjectType::Float :
-    refinementObjectSpace = new ObjectSpaceRepository<float, double>(dimension, typeid(float), prop.distanceType);
+    refinementObjectSpace = new ObjectSpaceRepository<float, double>(dimension, typeid(float), dtype);
     break;
   case NGT::ObjectSpace::ObjectType::Uint8 :
-    refinementObjectSpace = new ObjectSpaceRepository<unsigned char, int>(dimension, typeid(uint8_t), prop.distanceType);
+    refinementObjectSpace = new ObjectSpaceRepository<unsigned char, int>(dimension, typeid(uint8_t), dtype);
     break;
 #ifdef NGT_HALF_FLOAT
   case NGT::ObjectSpace::ObjectType::Float16 :
-    refinementObjectSpace = new ObjectSpaceRepository<float16, float>(dimension, typeid(float16), prop.distanceType);
+    refinementObjectSpace = new ObjectSpaceRepository<float16, float>(dimension, typeid(float16), dtype);
     break;
 #endif
 #ifdef NGT_BFLOAT
   case NGT::ObjectSpace::ObjectType::Bfloat16 :
-    refinementObjectSpace = new ObjectSpaceRepository<bfloat16, float>(dimension, typeid(bfloat16), prop.distanceType);
+    refinementObjectSpace = new ObjectSpaceRepository<bfloat16, float>(dimension, typeid(bfloat16), dtype);
     break;
 #endif
   default:
@@ -1852,7 +2383,7 @@ GraphAndTreeIndex::createIndexWithInsertionOrder(InsertionOrder &insertionOrder,
   CreateIndexThreadPool::OutputJobQueue &output = threads.getOutputJobQueue();
 
   BuildTimeController buildTimeController(*this, NeighborhoodGraph::property);
-
+  
   try {
     CreateIndexJob job;
     NGT::ObjectID id = 1;
@@ -1990,7 +2521,7 @@ GraphAndTreeIndex::createIndex(const vector<pair<NGT::Object*, size_t> > &object
 	}
 	{
 	  size_t size = NeighborhoodGraph::property.edgeSizeForCreation;
-	  sort(output.begin(), output.end());
+	  sort(output.begin(), output.end());	
 	  for (size_t idxi = 0; idxi < cnt; idxi++) {
 	    // add distances
 	    ObjectDistances &objs = *output[idxi].results;
@@ -2004,7 +2535,7 @@ GraphAndTreeIndex::createIndex(const vector<pair<NGT::Object*, size_t> > &object
 	      ObjectDistance	r;
 	      r.distance = GraphIndex::objectSpace->getComparator()(*output[idxi].object, *output[idxj].object);
 	      r.id = output[idxj].id;
-	      objs.push_back(r);
+	      objs.emplace_back(r);
 	    }
 	    std::sort(objs.begin(), objs.end());
 	    if (objs.size() > size) {
@@ -2017,7 +2548,7 @@ GraphAndTreeIndex::createIndex(const vector<pair<NGT::Object*, size_t> > &object
 	      ids[output[idxi].batchIdx].identical = true;
 	      ids[output[idxi].batchIdx].id = objs[0].id;
 	      ids[output[idxi].batchIdx].distance = objs[0].distance;
-	      output[idxi].id = 0;
+	      output[idxi].id = 0;	
 	    } else {
 	      assert(output[idxi].id == 0);
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
@@ -2074,7 +2605,7 @@ GraphAndTreeIndex::createIndex(const vector<pair<NGT::Object*, size_t> > &object
 	  }
 	  output.pop_front();
 	}
-
+	
 	count += cnt;
 	if (timerCount <= count) {
 	  timer.stop();
diff --git a/lib/NGT/Index.h b/lib/NGT/Index.h
index 257dce1..df73076 100644
--- a/lib/NGT/Index.h
+++ b/lib/NGT/Index.h
@@ -95,9 +95,10 @@ namespace NGT {
 #endif
 	prefetchOffset	= 0;
 	prefetchSize	= 0;
-#ifdef NGT_INNER_PRODUCT
-        maxMagnitude	= 0.0;
-#endif
+        maxMagnitude	= -1.0;
+	quantizationScale = 0.0;
+	quantizationOffset = 0.0;
+	clippingRate = 0.0;
 	nOfNeighborsForInsertionOrder = 0;
 	epsilonForInsertionOrder = 0.1;
       }
@@ -121,9 +122,10 @@ namespace NGT {
 	prefetchOffset	= -1;
 	prefetchSize	= -1;
 	accuracyTable	= "";
-#ifdef NGT_INNER_PRODUCT
         maxMagnitude	= -1;
-#endif
+	quantizationScale = -1.0;
+	quantizationOffset = -1.0;
+	clippingRate = -1.0;
 	nOfNeighborsForInsertionOrder = -1;
 	epsilonForInsertionOrder = -1;
       }
@@ -137,6 +139,7 @@ namespace NGT {
 #ifdef NGT_HALF_FLOAT
 	case ObjectSpace::ObjectType::Float16: p.set("ObjectType", "Float-2"); break;
 #endif
+	case ObjectSpace::ObjectType::Qsuint8: p.set("ObjectType", "QSUInteger-8B"); break;
 #ifdef NGT_BFLOAT
 	case ObjectSpace::ObjectType::Bfloat16: p.set("ObjectType", "Bfloat-2"); break;
 #endif
@@ -167,9 +170,7 @@ namespace NGT {
 	case DistanceType::DistanceTypeNormalizedAngle:		p.set("DistanceType", "NormalizedAngle"); break;
 	case DistanceType::DistanceTypeNormalizedCosine:	p.set("DistanceType", "NormalizedCosine"); break;
 	case DistanceType::DistanceTypeNormalizedL2:		p.set("DistanceType", "NormalizedL2"); break;
-#ifdef NGT_INNER_PRODUCT
 	case DistanceType::DistanceTypeInnerProduct:		p.set("DistanceType", "InnerProduct"); break;
-#endif
 	case DistanceType::DistanceTypePoincare:		p.set("DistanceType", "Poincare"); break;  // added by Nyapicom
 	case DistanceType::DistanceTypeLorentz:			p.set("DistanceType", "Lorentz"); break;  // added by Nyapicom
 	default : std::cerr << "Fatal error. Invalid distance type. " << distanceType << std::endl; abort();
@@ -199,9 +200,10 @@ namespace NGT {
 	p.set("PrefetchOffset", prefetchOffset);
 	p.set("PrefetchSize", prefetchSize);
 	p.set("AccuracyTable", accuracyTable);
-#ifdef NGT_INNER_PRODUCT
         p.set("MaxMagnitude", maxMagnitude);
-#endif
+        p.set("QuantizationScale", quantizationScale);
+        p.set("QuantizationOffset", quantizationOffset);
+	p.set("QuantizationClippingRate", clippingRate);
 	p.set("NumberOfNeighborsForInsertionOrder", nOfNeighborsForInsertionOrder);
 	p.set("EpsilonForInsertionOrder", epsilonForInsertionOrder);
       }
@@ -220,6 +222,8 @@ namespace NGT {
 	  } else if (it->second == "Float-2") {
 	    objectType = ObjectSpace::ObjectType::Float16;
 #endif
+	  } else if (it->second == "QSUInteger-8B") {
+	    objectType = ObjectSpace::ObjectType::Qsuint8;
 #ifdef NGT_BFLOAT
 	  } else if (it->second == "Bfloat-2") {
 	    objectType = ObjectSpace::ObjectType::Bfloat16;
@@ -282,10 +286,8 @@ namespace NGT {
 	    distanceType = DistanceType::DistanceTypeNormalizedCosine;
 	  } else if (it->second == "NormalizedL2") {
 	    distanceType = DistanceType::DistanceTypeNormalizedL2;
-#ifdef NGT_INNER_PRODUCT
 	  } else if (it->second == "InnerProduct") {
 	    distanceType = DistanceType::DistanceTypeInnerProduct;
-#endif
 	  } else {
 	    std::cerr << "Invalid Distance Type in the property. " << it->first << ":" << it->second << std::endl;
 	  }
@@ -347,9 +349,10 @@ namespace NGT {
 	if (it != p.end()) {
 	  searchType = it->second;
 	}
-#ifdef NGT_INNER_PRODUCT
 	maxMagnitude = p.getf("MaxMagnitude", maxMagnitude);
-#endif
+	quantizationScale = p.getf("QuantizationScale", quantizationScale);
+	quantizationOffset = p.getf("QuantizationOffset", quantizationOffset);
+	clippingRate = p.getf("QuantizationClippingRate", clippingRate);
 	nOfNeighborsForInsertionOrder = p.getl("NumberOfNeighborsForInsertionOrder", nOfNeighborsForInsertionOrder);
 	epsilonForInsertionOrder = p.getf("EpsilonForInsertionOrder", epsilonForInsertionOrder);
       }
@@ -373,9 +376,10 @@ namespace NGT {
       int		prefetchSize;
       std::string	accuracyTable;
       std::string	searchType;	// test
-#ifdef NGT_INNER_PRODUCT
       float		maxMagnitude;
-#endif
+      float		quantizationScale;
+      float		quantizationOffset;
+      float		clippingRate;
       int		nOfNeighborsForInsertionOrder;
       float		epsilonForInsertionOrder;
 #ifdef NGT_REFINEMENT
@@ -538,65 +542,33 @@ namespace NGT {
     static void createGraphAndTree(const std::string &database, NGT::Property &prop, bool redirect = false) { createGraphAndTree(database, prop, "", redirect); }
     static void createGraph(const std::string &database, NGT::Property &prop, const std::string &dataFile, size_t dataSize = 0, bool redirect = false);
     template<typename T> size_t insert(const std::vector<T> &object);
+    template<typename T> size_t insert(ObjectID id, const std::vector<T> &object);
     template<typename T> size_t append(const std::vector<T> &object);
     template<typename T> void update(ObjectID id, const std::vector<T> &object);
 #ifdef NGT_REFINEMENT
     template<typename T> size_t appendToRefinement(const std::vector<T> &object);
+    template<typename T> size_t insertToRefinement(const std::vector<T> &object);
     template<typename T> void updateToRefinement(ObjectID id, const std::vector<T> &object);
 #endif
-    static void append(const std::string &database, const std::string &dataFile, size_t threadSize, size_t dataSize);
-    static void append(const std::string &database, const float *data, size_t dataSize, size_t threadSize);
+    static void append(const std::string &index, const std::string &dataFile, size_t threadSize, size_t dataSize);
+    static void append(const std::string &index, const float *data, size_t dataSize, size_t threadSize);
+    static void appendFromRefinementObjectFile(const std::string &index);
+    void appendFromRefinementObjectFile();
+    void insertFromRefinementObjectFile();
+    static void appendFromTextObjectFile(const std::string &index, const std::string &data,
+					 size_t dataSize, bool append = true, bool refinement = false);
+    void appendFromTextObjectFile(const std::string &data, size_t dataSize, bool append = true, bool refinement = false);
+    static void appendFromBinaryObjectFile(const std::string &index, const std::string &data,
+					   size_t dataSize, bool append = true, bool refinement = false);
+    void appendFromBinaryObjectFile(const std::string &data, size_t dataSize, bool apend = true, bool refinement = false);
     static void remove(const std::string &database, std::vector<ObjectID> &objects, bool force = false);
     static void exportIndex(const std::string &database, const std::string &file);
     static void importIndex(const std::string &database, const std::string &file);
     virtual void load(const std::string &ifile, size_t dataSize) { getIndex().load(ifile, dataSize); }
     virtual void append(const std::string &ifile, size_t dataSize) { getIndex().append(ifile, dataSize); }
-    virtual void append(const float *data, size_t dataSize) {
-      StdOstreamRedirector redirector(redirect);
-      redirector.begin();
-      try {
-	getIndex().append(data, dataSize);
-      } catch(Exception &err) {
-	redirector.end();
-	throw err;
-      }
-      redirector.end();
-    }
-    virtual void append(const double *data, size_t dataSize) {
-      StdOstreamRedirector redirector(redirect);
-      redirector.begin();
-      try {
-	getIndex().append(data, dataSize);
-      } catch(Exception &err) {
-	redirector.end();
-	throw err;
-      }
-      redirector.end();
-    }
-    virtual void append(const uint8_t *data, size_t dataSize) {
-      StdOstreamRedirector redirector(redirect);
-      redirector.begin();
-      try {
-	getIndex().append(data, dataSize);
-      } catch(Exception &err) {
-	redirector.end();
-	throw err;
-      }
-      redirector.end();
-    }
-#ifdef NGT_HALF_FLOAT
-    virtual void append(const float16 *data, size_t dataSize) {
-      StdOstreamRedirector redirector(redirect);
-      redirector.begin();
-      try {
-	getIndex().append(data, dataSize);
-      } catch(Exception &err) {
-	redirector.end();
-	throw err;
-      }
-      redirector.end();
-    }
-#endif
+    template<typename T> void appendWithPreprocessing(const T *data, size_t dataSize, 
+						      bool append = true, bool refinement = false);
+    template<typename T> void append(const T *data, size_t dataSize);
     virtual size_t getNumberOfObjects() { return getIndex().getNumberOfObjects(); }
     virtual size_t getNumberOfIndexedObjects() { return getIndex().getNumberOfIndexedObjects(); }
     virtual size_t getObjectRepositorySize() { return getIndex().getObjectRepositorySize(); }
@@ -633,7 +605,12 @@ namespace NGT {
     virtual void search(NGT::SearchQuery &sc) { getIndex().search(sc); }
     virtual void search(NGT::SearchContainer &sc, ObjectDistances &seeds) { getIndex().search(sc, seeds); }
     virtual void getSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, size_t n) { getIndex().getSeeds(sc, seeds, n); }
-    virtual void remove(ObjectID id, bool force = false) { getIndex().remove(id, force); }
+    virtual void remove(ObjectID id, bool force = false) { 
+    try {
+      getRefinementObjectSpace().remove(id);
+      } catch(...) {}
+      getIndex().remove(id, force);
+    }
     virtual void exportIndex(const std::string &file) { getIndex().exportIndex(file); }
     virtual void importIndex(const std::string &file) { getIndex().importIndex(file); }
     virtual bool verify(std::vector<uint8_t> &status, bool info = false, char mode = '-') { return getIndex().verify(status, info, mode); }
@@ -665,6 +642,8 @@ namespace NGT {
     void disableLog() { redirect = true; }
 
     void extractInsertionOrder(InsertionOrder &insertionOrder);
+    void setQuantizationFromMaxMin(float max, float min);
+    void setQuantization(float scale, float offset);
     static void destroy(const std::string &path) {
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
       std::remove(std::string(path + "/grp").c_str());
@@ -924,42 +903,20 @@ namespace NGT {
     void destructObjectSpace() {
 #ifdef NGT_REFINEMENT
       if (refinementObjectSpace != 0) {
-	auto *os = (ObjectSpaceRepository<float, double>*)refinementObjectSpace;
 #ifndef NGT_SHARED_MEMORY_ALLOCATOR
-	os->deleteAll();
+	refinementObjectSpace->deleteAll();
 #endif
-	delete os;
+	delete refinementObjectSpace;
         refinementObjectSpace = 0;
       }
 #endif
-      if (objectSpace == 0) {
-	return;
-      }
-      if (property.objectType == NGT::ObjectSpace::ObjectType::Float) {
-	ObjectSpaceRepository<float, double> *os = (ObjectSpaceRepository<float, double>*)objectSpace;
-#ifndef NGT_SHARED_MEMORY_ALLOCATOR
-	os->deleteAll();
-#endif
-	delete os;
-      } else if (property.objectType == NGT::ObjectSpace::ObjectType::Uint8) {
-	ObjectSpaceRepository<unsigned char, int> *os = (ObjectSpaceRepository<unsigned char, int>*)objectSpace;
-#ifndef NGT_SHARED_MEMORY_ALLOCATOR
-	os->deleteAll();
-#endif
-	delete os;
-#ifdef NGT_HALF_FLOAT
-      } else if (property.objectType == NGT::ObjectSpace::ObjectType::Float16) {
-	ObjectSpaceRepository<float16, float> *os = (ObjectSpaceRepository<float16, float>*)objectSpace;
+      if (objectSpace != 0) {
 #ifndef NGT_SHARED_MEMORY_ALLOCATOR
-	os->deleteAll();
+	objectSpace->deleteAll();
 #endif
-	delete os;
-#endif
-      } else {
-	std::cerr << "Cannot find Object Type in the property. " << property.objectType << std::endl;
-	return;
+	delete objectSpace;
+	objectSpace = 0;
       }
-      objectSpace = 0;
     }
 
     virtual void load(const std::string &ifile, size_t dataSize = 0) {
@@ -1141,7 +1098,9 @@ namespace NGT {
 	  searchQuery.workingResult = std::move(sc.workingResult);
 	} else {
 	  size_t poffset = 12;
+#ifndef NGT_SHARED_MEMORY_ALLOCATOR
 	  size_t psize = 64;
+#endif
 	  auto size = sc.size;
 	  sc.size *= expansion;
 	  try {
@@ -1238,6 +1197,11 @@ namespace NGT {
       seedSize = seedSize > repositorySize ? repositorySize : seedSize;
       std::vector<ObjectID> deteted;
       size_t emptyCount = 0;
+#ifndef NGT_ENABLE_TIME_SEED_FOR_RANDOM
+      if (seeds.size() != 0) {
+	srand(seeds[0].id);
+      }
+#endif
       while (seedSize > seeds.size()) {
 	double random = ((double)rand() + 1.0) / ((double)RAND_MAX + 2.0);
 	size_t idx = floor(repositorySize * random) + 1;
@@ -1274,6 +1238,7 @@ namespace NGT {
       sc.size = NeighborhoodGraph::property.edgeSizeForCreation;
       sc.radius = FLT_MAX;
       sc.explorationCoefficient = NeighborhoodGraph::property.insertionRadiusCoefficient;
+      sc.insertion = true;
       try {
 	GraphIndex::search(sc);
       } catch(Exception &err) {
@@ -1311,9 +1276,7 @@ namespace NGT {
       }
     }
 
-    virtual void insert(
-			ObjectID id
-			) {
+    virtual void insert(ObjectID id) {
       ObjectRepository &fr = objectSpace->getRepository();
       if (fr[id] == 0) {
 	std::cerr << "NGTIndex::insert empty " << id << std::endl;
@@ -1910,20 +1873,21 @@ namespace NGT {
       so.size = 2;
       so.radius = 0.0;
       so.explorationCoefficient = 1.1;
+      so.insertion = true;
       ObjectDistances	seeds;
       seeds.push_back(ObjectDistance(id, 0.0));
       GraphIndex::search(so, seeds);
       if (results.size() == 0) {
-	if (!GraphIndex::objectSpace->isNormalizedDistance()) {
+	if (!GraphIndex::objectSpace->isNormalizedDistance() && !GraphIndex::objectSpace->isQintObjectType()) {
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
 	  GraphIndex::objectSpace->deleteObject(obj);
 #endif
 	  std::stringstream msg;
-	  msg << "Not found the specified id. ID=" << id;
+	  msg << "Not found the specified id. (1) ID=" << id;
 	  NGTThrowException(msg);
 	}
 	so.radius = FLT_MAX;
-	so.size = 10;
+	so.size = 50;
 	results.clear();
 	GraphIndex::search(so, seeds);
 	for (size_t i = 0; i < results.size(); i++) {
@@ -1952,7 +1916,7 @@ namespace NGT {
 	  GraphIndex::objectSpace->deleteObject(obj);
 #endif
 	  std::stringstream msg;
-	  msg << "Not found the specified id. ID=" << id;
+	  msg << "Not found the specified id. (2) ID=" << id;
 	  NGTThrowException(msg);
 	}
       }
@@ -1984,6 +1948,7 @@ namespace NGT {
       sc.radius = FLT_MAX;
       sc.explorationCoefficient = NeighborhoodGraph::property.insertionRadiusCoefficient;
       sc.useAllNodesInLeaf = true;
+      sc.insertion = true;
       try {
 	GraphAndTreeIndex::search(sc);
       } catch(Exception &err) {
@@ -2110,7 +2075,9 @@ namespace NGT {
       size_t seedSize = NeighborhoodGraph::property.seedSize == 0 ? sc.size : NeighborhoodGraph::property.seedSize;
       seedSize = seedSize > sc.size ? sc.size : seedSize;
       if (seeds.size() > seedSize) {
+#ifndef NGT_ENABLE_TIME_SEED_FOR_RANDOM
 	srand(tso.nodeID.getID());
+#endif
 	// to accelerate thinning data.
 	for (size_t i = seeds.size(); i > seedSize; i--) {
 	  double random = ((double)rand() + 1.0) / ((double)RAND_MAX + 2.0);
@@ -2146,7 +2113,9 @@ namespace NGT {
 	  searchQuery.workingResult = std::move(sc.workingResult);
 	} else {
 	  size_t poffset = 12;
+#ifndef NGT_SHARED_MEMORY_ALLOCATOR
 	  size_t psize = 64;
+#endif
 	  auto size = sc.size;
 	  sc.size *= expansion;
 	  try {
@@ -2283,6 +2252,7 @@ namespace NGT {
     }
   };
 
+
 } // namespace NGT
 
 template<typename T>
@@ -2293,32 +2263,28 @@ size_t NGT::Index::append(const std::vector<T> &object)
   if (repo.size() == 0) {
     repo.initialize();
   }
-
   auto *o = repo.allocateNormalizedPersistentObject(object);
   repo.push_back(dynamic_cast<PersistentObject*>(o));
   size_t oid = repo.size() - 1;
   return oid;
 }
 
-#ifdef NGT_REFINEMENT
 template<typename T>
-size_t NGT::Index::appendToRefinement(const std::vector<T> &object)
+size_t NGT::Index::insert(const std::vector<T> &object)
 {
-  auto &os = getRefinementObjectSpace();
+  auto &os = getObjectSpace();
   auto &repo = os.getRepository();
   if (repo.size() == 0) {
     repo.initialize();
   }
 
   auto *o = repo.allocateNormalizedPersistentObject(object);
-  repo.push_back(dynamic_cast<PersistentObject*>(o));
-  size_t oid = repo.size() - 1;
+  size_t oid = repo.insert(dynamic_cast<PersistentObject*>(o));
   return oid;
 }
-#endif
 
 template<typename T>
-size_t NGT::Index::insert(const std::vector<T> &object)
+size_t NGT::Index::insert(ObjectID id,  const std::vector<T> &object)
 {
   auto &os = getObjectSpace();
   auto &repo = os.getRepository();
@@ -2327,7 +2293,7 @@ size_t NGT::Index::insert(const std::vector<T> &object)
   }
 
   auto *o = repo.allocateNormalizedPersistentObject(object);
-  size_t oid = repo.insert(dynamic_cast<PersistentObject*>(o));
+  size_t oid = repo.insert(id, dynamic_cast<PersistentObject*>(o));
   return oid;
 }
 
@@ -2357,6 +2323,35 @@ template<typename T>
 }
 
 #ifdef NGT_REFINEMENT
+template<typename T>
+size_t NGT::Index::appendToRefinement(const std::vector<T> &object)
+{
+  auto &os = getRefinementObjectSpace();
+  auto &repo = os.getRepository();
+  if (repo.size() == 0) {
+    repo.initialize();
+  }
+
+  auto *o = repo.allocateNormalizedPersistentObject(object);
+  repo.push_back(dynamic_cast<PersistentObject*>(o));
+  size_t oid = repo.size() - 1;
+  return oid;
+}
+
+template<typename T>
+size_t NGT::Index::insertToRefinement(const std::vector<T> &object)
+{
+  auto &os = getRefinementObjectSpace();
+  auto &repo = os.getRepository();
+  if (repo.size() == 0) {
+    repo.initialize();
+  }
+
+  auto *o = repo.allocateNormalizedPersistentObject(object);
+  size_t oid = repo.insert(dynamic_cast<PersistentObject*>(o));
+  return oid;
+}
+
 template<typename T>
   void NGT::Index::updateToRefinement(ObjectID id, const std::vector<T> &object)
 {
@@ -2382,3 +2377,150 @@ template<typename T>
   return;
 }
 #endif
+
+template<typename T>
+  void NGT::Index::appendWithPreprocessing(const T *data, size_t dataSize, bool append, bool refinement) {
+  if (dataSize == 0) {
+    return;
+  }
+  NGT::Property prop;
+  getProperty(prop);
+  float maxMag = prop.maxMagnitude;
+  bool maxMagSkip = false;
+  if (maxMag > 0.0) maxMagSkip = true;
+  std::vector<float> addedElement;
+  auto *obj = data;
+  size_t dim = prop.dimension;
+  if (append && prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+    NGT::Timer timer;
+    timer.start();
+    size_t counter = 0;
+    for (size_t idx = 0; idx < dataSize; idx++, obj += dim) {
+      std::vector<float> object;
+      object.reserve(dim);
+      for (size_t dataidx = 0; dataidx < dim; dataidx++) {
+	object.push_back(obj[dataidx]);
+      }
+      double mag = 0.0;
+      if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	for (auto &v : object) {
+	  mag += static_cast<float>(v) * v;
+	}
+	if (!maxMagSkip && mag > maxMag) {
+	  maxMag = mag;
+	}
+	addedElement.emplace_back(mag);
+      }
+      counter++;
+      if (counter % 2000000 == 0) {
+	timer.stop();
+	std::cerr << "processed " << static_cast<float>(counter) / 1000000.0 << "M objects."
+		  << " maxMag=" << maxMag << " time=" << timer << std::endl;
+	timer.restart();
+      }
+    }
+    timer.stop();
+    std::cerr << "time=" << timer << std::endl;
+    std::cerr << "maxMag=" << maxMag << std::endl;
+    std::cerr << "dataSize=" << dataSize << std::endl;
+    if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+      if (static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude <= 0.0 && maxMag > 0.0) {
+	static_cast<NGT::GraphIndex&>(getIndex()).property.maxMagnitude = maxMag;
+      }
+    }
+  }
+
+  if (append && getObjectSpace().isQintObjectType() && prop.clippingRate >= 0.0) {
+    std::priority_queue<float> min;
+    std::priority_queue<float, std::vector<float>, std::greater<float>> max;
+    {
+      NGT::Timer timer;
+      timer.start();
+      auto clippingSize = static_cast<float>(dataSize * dim) * prop.clippingRate;
+      clippingSize = clippingSize == 0 ? 1 : clippingSize;
+      std::string line;
+      size_t counter = 0;
+      obj = data;
+      for (size_t idx = 0; idx < dataSize; idx++, obj += dim) {
+	std::vector<float> object;
+	object.reserve(dim);
+	for (size_t dataidx = 0; dataidx < dim; dataidx++) {
+	  object.push_back(obj[dataidx]);
+	}
+	if (getObjectSpace().isNormalizedDistance()) {
+	  ObjectSpace::normalize(object);
+	}
+	if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	  float v = maxMag - addedElement[counter];
+	  object.emplace_back(sqrt(v >= 0.0 ? v : 0.0));
+	}
+	for (auto &v : object) {
+	  if (max.size() < clippingSize) {
+	    max.push(v);
+	  } else if (max.top() <= v) {
+	    max.push(v);
+	    max.pop();
+	  }
+	  if (min.size() < clippingSize) {
+	    min.push(v);
+	  } else if (min.top() >= v) {
+	    min.push(v);
+	    min.pop();
+	  }
+	}
+	counter++;
+      }
+      std::cerr << "time=" << timer << std::endl;
+      if (counter != 0) {
+	std::cerr << "max:min=" << max.top() << ":" << min.top() << std::endl;
+	setQuantizationFromMaxMin(max.top(), min.top());
+      }
+    }
+  }
+  if (append || refinement) {
+
+    size_t counter = 0;
+    obj = data;
+    for (size_t idx = 0; idx < dataSize; idx++, obj += dim) {
+      std::vector<float> object;
+      object.reserve(dim);
+      for (size_t dataidx = 0; dataidx < dim; dataidx++) {
+	object.push_back(obj[dataidx]);
+      }
+#ifdef NGT_REFINEMENT
+      if (refinement) {
+	appendToRefinement(object);
+      }
+#endif
+      if (append) {
+	if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) {
+	  object.emplace_back(sqrt(maxMag - addedElement[counter]));
+	}
+	NGT::Index::append(object);
+      }
+      counter++;
+    }
+  }
+
+}
+
+template<typename T>
+void NGT::Index::append(const T *data, size_t dataSize) {
+  StdOstreamRedirector redirector(redirect);
+  redirector.begin();
+  try {
+    NGT::Property prop;
+    getProperty(prop);
+    if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct ||
+	getObjectSpace().isQintObjectType()) {
+      appendWithPreprocessing(data, dataSize);
+    } else {
+      auto &index = static_cast<GraphIndex&>(getIndex());
+      index.append(data, dataSize);
+    }
+  } catch(Exception &err) {
+    redirector.end();
+    throw err;
+  }
+  redirector.end();
+}
diff --git a/lib/NGT/NGTQ/HierarchicalKmeans.cpp b/lib/NGT/NGTQ/HierarchicalKmeans.cpp
index 444c8bc..4b16629 100644
--- a/lib/NGT/NGTQ/HierarchicalKmeans.cpp
+++ b/lib/NGT/NGTQ/HierarchicalKmeans.cpp
@@ -158,9 +158,6 @@ void QBG::HierarchicalKmeans::threeLayerClustering(std::string prefix, QBG::Inde
 	numOfThirdClusters = index.getQuantizer().property.globalCentroidLimit;
       }
     }
-    if (numOfThirdClusters != 0 && index.getQuantizer().property.globalCentroidLimit != 0 &&
-	numOfThirdClusters != index.getQuantizer().property.globalCentroidLimit) {
-    }
     auto &quantizer = static_cast<NGTQ::QuantizerInstance<uint8_t>&>(index.getQuantizer());
     QBGObjectList &objectList = quantizer.objectList;
     if (numOfObjects == 0) {
@@ -287,7 +284,7 @@ void QBG::HierarchicalKmeans::threeLayerClustering(std::string prefix, QBG::Inde
 	  if (thirdFlatClusters[idx].members.size() == 0) {
 	    std::cerr << "warning. found an empty cluster in thirdFlatClusters. " << idx << std::endl;
 	  } else {
-	    bqindex.push_back(idx1);
+	    bqindex.emplace_back(idx1);
 	  }
 	}
       }
@@ -586,6 +583,8 @@ void QBG::HierarchicalKmeans::clustering(std::string indexPath, std::string pref
   NGT::StdOstreamRedirector redirector(!verbose);
   redirector.begin();
 
+  QBG::Index::setupObjects(indexPath, 0, verbose);
+
   std::cerr << "The specified params=FC:" << numOfFirstClusters << ":FO:" << numOfFirstObjects
 	    << ",SC:" << numOfSecondClusters << ":SO:" << numOfSecondObjects
 	    << ",TC:" << numOfThirdClusters << ":TO:" << numOfThirdObjects << ",O:" << numOfObjects << std::endl;
@@ -637,5 +636,77 @@ void QBG::HierarchicalKmeans::clustering(std::string indexPath, std::string pref
   redirector.end();
 }
 
+void QBG::HierarchicalKmeans::assignAll(std::string indexPath, int64_t lowerBoundOfNoOfObjects, size_t noOfNearestNeighbors) {
+
+  std::cerr << "assignAll " << lowerBoundOfNoOfObjects << ":" << noOfNearestNeighbors << std::endl;
+  bool readOnly = false;
+  QBG::Index index(indexPath, readOnly);
+  if (index.getQuantizer().objectList.size() <= 1) {
+    NGTThrowException("No objects in the index.");
+  }
+
+  auto &quantizer = static_cast<NGTQ::QuantizerInstance<uint8_t>&>(index.getQuantizer());
+  QBGObjectList &objectList = quantizer.objectList;
+  if (numOfObjects == 0) {
+    numOfObjects = objectList.size() - 1;
+  }
+  auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace();
+  std::vector<NGT::Clustering::Cluster> thirdFlatClusters;
+  std::string prefix = indexPath + "/" + QBG::Index::getWorkspaceName();
+  prefix +="/" + QBG::Index::getHierarchicalClusteringPrefix();
+  NGT::Clustering::loadClusters(prefix + QBG::Index::getThirdCentroidSuffix(), thirdFlatClusters);
+
+  assignWithNGT(thirdFlatClusters, 1, numOfObjects, objectSpace, objectList, epsilonExplorationSize, expectedRecall, noOfNearestNeighbors);
+  size_t remove = 0;
+  size_t max = 0;
+  for (auto it = thirdFlatClusters.begin(); it != thirdFlatClusters.end();) {
+    if ((*it).members.size() > max) {
+      max = (*it).members.size();
+    }
+    if (static_cast<int64_t>((*it).members.size()) <= lowerBoundOfNoOfObjects) {
+      remove++;
+      (*it) = std::move(thirdFlatClusters.back());
+      thirdFlatClusters.pop_back();
+    } else {
+      ++it;
+    }
+  }
+  std::cerr << "max=" << max << " removed=" << remove << std::endl;
+  if (lowerBoundOfNoOfObjects > 0 && remove > 0) {
+    NGT::Clustering::clearMembers(thirdFlatClusters);
+    std::cerr << "the second assignWithNGT." << std::endl;
+    assignWithNGT(thirdFlatClusters, 1, numOfObjects, objectSpace, objectList, epsilonExplorationSize, expectedRecall, noOfNearestNeighbors);
+  }
+  
+  {
+    std::vector<std::vector<uint32_t>> cindex(numOfObjects);
+    for (auto it = thirdFlatClusters.begin(); it != thirdFlatClusters.end(); ++it) {
+      size_t idx = distance(thirdFlatClusters.begin(), it);
+      if (lowerBoundOfNoOfObjects >= 0 && (*it).members.empty()) {
+	std::stringstream msg;
+	msg << "Fatal error! Found empty cluster. " << idx;
+	NGTThrowException(msg);
+      }
+      for (auto mit = (*it).members.begin(); mit != (*it).members.end(); ++mit) {
+	size_t vid = (*mit).vectorID;
+	cindex[vid].emplace_back(idx);
+      }
+    }
+    std::cerr << "save index... " << cindex.size() << std::endl;
+    NGT::Clustering::saveVectors(prefix + QBG::Index::getObjTo3rdSuffix(), cindex);
+  }
+  if (remove > 0) {
+    std::cerr << "found empty clusters. " << remove << std::endl;
+    NGT::Clustering::saveClusters(prefix + QBG::Index::getThirdCentroidSuffix(), thirdFlatClusters);
+    NGT::Clustering::saveClusters(prefix + QBG::Index::getSecondCentroidSuffix(), thirdFlatClusters);
+    std::vector<size_t> bqindex;
+    for (size_t idx = 0; idx < thirdFlatClusters.size(); idx++) {
+      bqindex.emplace_back(idx);
+    }
+    std::cerr << "save the 3rd to the 2nd index..." << std::endl;
+    NGT::Clustering::saveVector(prefix + QBG::Index::get3rdTo2ndSuffix(), bqindex);
+  }
+
+}
 #endif
 
diff --git a/lib/NGT/NGTQ/HierarchicalKmeans.h b/lib/NGT/NGTQ/HierarchicalKmeans.h
index f7f2284..93eccff 100644
--- a/lib/NGT/NGTQ/HierarchicalKmeans.h
+++ b/lib/NGT/NGTQ/HierarchicalKmeans.h
@@ -241,8 +241,8 @@ namespace QBG {
       size_t rootID = 0;
       HKInternalNode &root = static_cast<HKInternalNode&>(*nodes[rootID]);
       std::cerr << "first=" << root.children.size() << std::endl;
-      size_t secondCount = 0;
-      size_t thirdCount = 0;
+      size_t secondCount = 0;	
+      size_t thirdCount = 0;	
       size_t objectCount = 0;
       size_t leafID = 0;
       size_t qID = 0;
@@ -1020,7 +1020,7 @@ namespace QBG {
 	index.linearSearch(sc);
       }
 
-      float startEpsilon = 0.12;
+      float startEpsilon = 0.02;
       float epsilon;
       std::vector<float> recall(endID - beginID, 0.0);
       for (epsilon = startEpsilon; epsilon < 1.0; epsilon += 0.01) {
@@ -1073,7 +1073,8 @@ namespace QBG {
     static void assignWithNGT(std::vector<NGT::Clustering::Cluster> &clusters, size_t beginID, size_t endID,
 			      NGT::ObjectSpace &objectSpace, QBGObjectList &objectList,
 			      size_t epsilonExplorationSize = 100,
-			      float expectedRecall = 0.98) {
+			      float expectedRecall = 0.98,
+			      size_t noOfNearestNeighbors = 1) {
       if (beginID > endID) {
 	std::cerr << "assignWithNGT::Warning. beginID:" << beginID << " > endID:" << endID << std::endl;
 	return;
@@ -1122,7 +1123,7 @@ namespace QBG {
 	int numOfOutgoingEdges = 10;
 	int numOfIncomingEdges = 120;
 	int numOfQueries = 200;
-	int numOfResultantObjects = 20;
+	int numOfResultantObjects = noOfNearestNeighbors + 19;
 	graphOptimizer.set(numOfOutgoingEdges, numOfIncomingEdges, numOfQueries, numOfResultantObjects);
 	graphOptimizer.execute(anng, onng);
       }
@@ -1147,11 +1148,10 @@ namespace QBG {
 	abort();
       }
 #endif
-      std::vector<std::pair<uint32_t, float>> clusterIDs(endID - beginID);
       std::vector<std::pair<size_t, float>> distances(omp_get_max_threads(), std::make_pair(0, 0.0));
       size_t endOfEval = beginID + epsilonExplorationSize;
       endOfEval = endOfEval > endID ? endID : endOfEval;
-      size_t nOfObjects = 20;
+      size_t nOfObjects = noOfNearestNeighbors + 19;;
       NGT::Timer timer;
       timer.start();
       auto epsilon = optimizeEpsilon(index, beginID, endOfEval, nOfObjects,
@@ -1161,43 +1161,58 @@ namespace QBG {
       timer.start();
       size_t progressStep = (endID - beginID) / 20;;
       progressStep = progressStep < 20 ? 20 : progressStep;
+      size_t step = 1000000;
+      for (size_t bid = beginID; bid < endID; bid += step) {
+	std::vector<std::vector<std::pair<uint32_t, float>>> clusterIDs(step);
+	auto eid = std::min(endID, bid + step);
 #pragma omp parallel for
-      for (size_t id = beginID; id < endID; id++) {
-	std::vector<float> obj;
+	for (size_t id = bid; id < eid; id++) {
+	  std::vector<float> obj;
 #ifdef MULTIPLE_OBJECT_LISTS
-	objectList.get(omp_get_thread_num(), id, obj, &objectSpace);
+	  objectList.get(omp_get_thread_num(), id, obj, &objectSpace);
 #else
-	objectList.get(id, obj, &objectSpace);
+	  objectList.get(id, obj, &objectSpace);
 #endif
-	NGT::SearchQuery	sc(obj);
-	NGT::ObjectDistances	objects;
-	sc.setResults(&objects);
-	sc.setSize(nOfObjects);
-	sc.setEpsilon(epsilon);
-	index.search(sc);
-	clusterIDs[id - beginID] = make_pair(objects[0].id - 1, objects[0].distance);
-	auto threadID = omp_get_thread_num();
-	distances[threadID].first++;
-	distances[threadID].second += objects[0].distance;
-	{
-	  size_t cnt = 0;
-	  for (auto d : distances) {
-	    cnt += d.first;
+	  NGT::SearchQuery	sc(obj);
+	  NGT::ObjectDistances	objects;
+	  sc.setResults(&objects);
+	  sc.setSize(nOfObjects);
+	  sc.setEpsilon(epsilon);
+	  index.search(sc);
+	  if (clusterIDs[id - bid].capacity() < noOfNearestNeighbors) {
+	    clusterIDs[id - bid].reserve(noOfNearestNeighbors);
+	  }
+	  for (size_t i = 0; objects.size(); i++) {
+	    if (clusterIDs[id - bid].size() == noOfNearestNeighbors) {
+	      break;
+	    }
+	    clusterIDs[id - bid].emplace_back(make_pair(objects[i].id - 1, objects[0].distance));
+	  }
+	  auto threadID = omp_get_thread_num();
+	  distances[threadID].first++;
+	  distances[threadID].second += objects[0].distance;
+	  {
+	    size_t cnt = 0;
+	    for (auto d : distances) {
+	      cnt += d.first;
+	    }
+	    if (cnt % progressStep == 0) {
+	      timer.stop();
+	      float progress = cnt * 100 / (endID - beginID);
+	      std::cerr << "assignWithNGT: " << cnt << " objects ("
+			<< progress  << "%) have been assigned. time=" << timer << std::endl;
+	      timer.restart();
+	    }
+	  }
+	}
+	std::cerr << "pushing..." << std::endl;
+	for (size_t id = bid; id < eid; id++) {
+	  for (auto &e : clusterIDs[id - bid]) {
+	    auto cid = e.first;
+	    auto cdistance = e.second;
+	    clusters[cid].members.emplace_back(NGT::Clustering::Entry(id - 1, cid, cdistance));
 	  }
-          if (cnt % progressStep == 0) {
-            timer.stop();
-            float progress = cnt * 100 / (endID - beginID);
-            std::cerr << "assignWithNGT: " << cnt << " objects ("
-                      << progress  << "%) have been assigned. time=" << timer << std::endl;
-            timer.restart();
-          }
 	}
-      }
-      std::cerr << "pushing..." << std::endl;
-      for (size_t id = beginID; id < endID; id++) {
-	auto cid = clusterIDs[id - beginID].first;
-	auto cdistance = clusterIDs[id - beginID].second;
-	clusters[cid].members.push_back(NGT::Clustering::Entry(id - 1, cid, cdistance));
       }
       {
 	size_t n = 0;
@@ -1286,6 +1301,8 @@ namespace QBG {
     void clustering(std::string indexPath, std::string prefix = "", std::string objectIDsFile = "");
 #endif
 
+    void assignAll(std::string indexPath, int64_t lowerBoundOfNoOfObjects, size_t noOfNearestNeighbors = 1);
+  
     size_t	maxSize;
     size_t	numOfObjects;
     size_t	numOfClusters;
diff --git a/lib/NGT/NGTQ/ObjectFile.h b/lib/NGT/NGTQ/ObjectFile.h
index 3d48949..c98af84 100644
--- a/lib/NGT/NGTQ/ObjectFile.h
+++ b/lib/NGT/NGTQ/ObjectFile.h
@@ -24,6 +24,7 @@
 #include <stdexcept>
 #include <cerrno>
 #include <cstring>
+#include "NGT/ArrayFile.h"
 
 namespace NGT {
   class ObjectSpace;
@@ -34,7 +35,8 @@ class ObjectFile : public ArrayFile<NGT::Object> {
   enum DataType {
     DataTypeUint8   = 0,
     DataTypeFloat   = 1,
-    DataTypeFloat16 = 2
+    DataTypeFloat16 = 2,
+    DataTypeNone = 99
   };
 
   ObjectFile():objectSpace(0) {}
@@ -52,6 +54,9 @@ class ObjectFile : public ArrayFile<NGT::Object> {
 
   bool open() {
     if (!ArrayFile<NGT::Object>::open(fileName)) {
+      std::stringstream msg;
+      msg << "ObjectFile::Cannot open the specified file. " << fileName;
+      NGTThrowException(msg);
       return false;
     }
     switch (dataType) {
@@ -70,7 +75,7 @@ class ObjectFile : public ArrayFile<NGT::Object> {
       break;
 #endif
     default:
-      stringstream msg;
+      std::stringstream msg;
       msg << "ObjectFile::Invalid Object Type in the property. " << dataType;
       NGTThrowException(msg);
       break;
@@ -129,7 +134,7 @@ class ObjectFile : public ArrayFile<NGT::Object> {
   template<typename T>
   bool get(const size_t id, std::vector<T> &data, NGT::ObjectSpace *os = 0) {
     if (objectSpace == 0) {
-      stringstream msg;
+      std::stringstream msg;
       msg << "ObjectFile::Fatal Error. objectSpace is not set." << std::endl;
       NGTThrowException(msg);
     }
@@ -169,13 +174,13 @@ class ObjectFile : public ArrayFile<NGT::Object> {
 
   void put(const size_t id, std::vector<float> &data, NGT::ObjectSpace *os = 0) {
     if (objectSpace == 0) {
-      stringstream msg;
+      std::stringstream msg;
       msg << "ObjectFile::Fatal Error. objectSpace is not set." << std::endl;
       NGTThrowException(msg);
     }
     if (objectSpace->getDimension() != data.size()) {
-      stringstream msg;
-      msg << "ObjectFile::Dimensions are inconsistency. " << objectSpace->getDimension() << ":" << data.size();
+      std::stringstream msg;
+      msg << "ObjectFile::Dimensions are inconsistent. " << objectSpace->getDimension() << ":" << data.size();
       NGTThrowException(msg);
     }
     NGT::Object *object = objectSpace->allocateObject();
@@ -304,13 +309,13 @@ class StaticObjectFileLoader {
       id = noOfObjects - 1;
     }
     size_t headerSize = sizeof(noOfObjects) + sizeof(noOfDimensions);
-    stream.seekg(id * sizeOfObject + headerSize, ios_base::beg);
+    stream.seekg(id * sizeOfObject + headerSize, std::ios_base::beg);
     counter = id;
     return;
   }
 
   std::vector<float> getObject() {
-    vector<float> object;
+    std::vector<float> object;
     if (isEmpty()) {
       return object;
     }
@@ -448,12 +453,12 @@ bool StaticObjectFile<TYPE>::open(const std::string &file, size_t pseudoDimensio
 
   bool ret = _readFileHead();
   if (_fileHead.noOfObjects != noOfObjects) {
-    stringstream msg;
+    std::stringstream msg;
     msg << "Invalid # of objects=" << _fileHead.noOfObjects << ":" << noOfObjects;
     NGTThrowException(msg);
   }
   if (_fileHead.noOfDimensions != noOfDimensions) {
-    stringstream msg;
+    std::stringstream msg;
     msg << "Invalid # of dimensions=" << _fileHead.noOfDimensions << ":" << noOfDimensions;
     NGTThrowException(msg);
   }
diff --git a/lib/NGT/NGTQ/Optimizer.cpp b/lib/NGT/NGTQ/Optimizer.cpp
index 3dc407c..7d9e5ce 100644
--- a/lib/NGT/NGTQ/Optimizer.cpp
+++ b/lib/NGT/NGTQ/Optimizer.cpp
@@ -593,4 +593,214 @@ void QBG::Optimizer::optimize(std::string invector, std::string ofile, std::stri
   }
 #endif
 }
+
+size_t QBG::Optimizer::extractScaleAndOffset(const std::string indexPath, float clippingRate, 
+					     int32_t nOfObjects, bool verbose) {
+  NGT::StdOstreamRedirector redirector(!verbose);
+  redirector.begin();
+
+  int32_t n = 0;
+  try {
+    QBG::Index index(indexPath);
+    auto &quantizer = index.getQuantizer();
+    auto dim = quantizer.property.dimension;
+    if (clippingRate < 0.0) {
+      clippingRate = quantizer.property.scalarQuantizationClippingRate;
+    }
+    if (nOfObjects < 0) {
+      nOfObjects = quantizer.property.scalarQuantizationNoOfSamples;
+    }
+    if (clippingRate < 0.0) {
+      std::stringstream msg;
+      msg << "Clipping rate is invalid. " << clippingRate;
+      NGTThrowException(msg);
+    }
+    if (nOfObjects < 0) {
+      return 0;
+    }
+    auto &objectList = quantizer.objectList;
+    if (objectList.size() <= 1) {
+      NGTThrowException("optimize: No objects");
+    }
+    std::priority_queue<float> min;
+    std::priority_queue<float, vector<float>, std::greater<float>> max;
+    if (nOfObjects == 0 || nOfObjects >= static_cast<int32_t>(objectList.size())) {
+      nOfObjects = objectList.size() - 1;
+    }
+    auto cutsize = static_cast<float>(nOfObjects * dim) * clippingRate;
+    cutsize = cutsize == 0 ? 1 : cutsize;
+    for (size_t id = 1; id < objectList.size(); id++) {
+      if (n == nOfObjects) break;
+      auto p = static_cast<double>(nOfObjects - n) / (objectList.size() - id);
+      double random = (rand() + 1.0) / (RAND_MAX + 2.0);
+      if (random > p) continue;
+      std::vector<float> object;
+      objectList.get(id, object, &quantizer.globalCodebookIndex.getObjectSpace());
+      if (!quantizer.rotation.empty()) {
+	quantizer.rotation.mul(object);
+      }
+      for (auto &v : object) {
+	if (max.size() < cutsize) {
+	  max.push(v);
+	} else if (max.top() <= v) {
+	  max.push(v);
+	  max.pop();
+	}
+	if (min.size() < cutsize) {
+	  min.push(v);
+	} else if (min.top() >= v) {
+	  min.push(v);
+	  min.pop();
+	}
+      }
+      n++;
+    }
+    index.setQuantizationFromMaxMin(max.top(), min.top());
+    index.saveProperty();
+  } catch(NGT::Exception &err) {
+    redirector.end();
+    throw err;
+  }
+  redirector.end();
+  return n;
+}
+
+size_t QBG::Optimizer::convertObjectsFromInnerProductToL2(const std::string indexPath, size_t nOfObjects, bool verbose) {
+  NGT::StdOstreamRedirector redirector(!verbose);
+  redirector.begin();
+  NGT::Timer timer;
+  timer.start();
+
+  size_t count = 0;
+  try {
+    QBG::Index index(indexPath);
+    auto &quantizer = index.getQuantizer();
+    auto dim = quantizer.property.genuineDimension;
+    auto &objectList = quantizer.objectList;
+    if (objectList.size() <= 1) {
+      NGTThrowException("optimize: No objects");
+    }
+    if (dim != objectList.genuineDimension) {
+      std::stringstream msg;
+      msg << "Inner fatal error! The dimensions are inconsitent. "
+	  << dim << ":" << objectList.genuineDimension;
+      NGTThrowException(msg);
+    }
+    std::priority_queue<float> min;
+    std::priority_queue<float, vector<float>, std::greater<float>> max;
+    if (nOfObjects == 0 || nOfObjects >= objectList.size()) {
+      nOfObjects = objectList.size() - 1;
+    }
+    std::vector<float> mags(objectList.size());
+
+    float maxMag;
+    if (quantizer.property.maxMagnitude > 0.0) {
+      maxMag = quantizer.property.maxMagnitude;
+    } else {
+      maxMag = 0.0;
+      for (size_t id = 1; id < objectList.size(); id++) {
+	if (count == nOfObjects) break;
+	auto p = static_cast<double>(nOfObjects - count) / (objectList.size() - id);
+	double random = (rand() + 1.0) / (RAND_MAX + 2.0);
+	if (random > p) {
+	  mags[id] = -1.0;
+	  continue;
+	}
+	std::vector<float> object;
+	objectList.get(id, object, &quantizer.globalCodebookIndex.getObjectSpace());
+	float mag = 0.0;
+	for (size_t i = 0; i < dim - 1; i++) {
+	  mag += object[i] * object[i];
+	}
+	if (mag > maxMag) {
+	  maxMag = mag;
+	}
+	mags[id] = mag;
+	count++;
+	if (count % 2000000 == 0) {
+	  timer.stop();
+	  std::cerr << "processed " << static_cast<float>(count) / 1000000.0 << "M objects."
+		    << " maxMag=" << maxMag << " time=" << timer << std::endl;
+	  timer.restart();
+	}
+      }
+      index.setMaxMagnitude(maxMag);
+      index.saveProperty();
+    }
+    for (size_t id = 1; id < objectList.size(); id++) {
+      std::vector<float> object;
+      objectList.get(id, object, &quantizer.globalCodebookIndex.getObjectSpace());
+      float mag = mags[id];
+      if (mag < 0.0) {
+	mag = 0.0;
+	for (size_t i = 0; i < dim - 1; i++) {
+	  mag += object[i] * object[i];
+	}
+      }
+      object[dim - 1] = sqrt(maxMag - mag);
+      object.resize(dim);
+      objectList.put(id, object, &quantizer.globalCodebookIndex.getObjectSpace());
+      if (id % 2000000 == 0) {
+	timer.stop();
+	std::cerr << "processed " << static_cast<float>(id) / 1000000.0 << "M objects."
+		  << " maxMag=" << maxMag << " time=" << timer << std::endl;
+	timer.restart();
+      }
+    }
+  } catch(NGT::Exception &err) {
+    redirector.end();
+    throw err;
+  }
+  redirector.end();
+  return count;
+}
+
+size_t QBG::Optimizer::normalizeObjectsForCosine(const std::string indexPath, size_t nOfObjects, bool verbose) {
+  NGT::StdOstreamRedirector redirector(!verbose);
+  redirector.begin();
+  NGT::Timer timer;
+  timer.start();
+
+  size_t count = 0;
+  try {
+    QBG::Index index(indexPath);
+    auto &quantizer = index.getQuantizer();
+    auto dim = quantizer.property.genuineDimension;
+    auto &objectList = quantizer.objectList;
+    if (objectList.size() <= 1) {
+      NGTThrowException("optimize: No objects");
+    }
+    if (dim != objectList.genuineDimension) {
+      std::stringstream msg;
+      msg << "Inner fatal error! The dimensions are inconsitent. "
+	  << dim << ":" << objectList.genuineDimension;
+      NGTThrowException(msg);
+    }
+    for (size_t id = 1; id < objectList.size(); id++) {
+      std::vector<float> object;
+      objectList.get(id, object, &quantizer.globalCodebookIndex.getObjectSpace());
+      object.resize(dim);
+      float mag = 0.0;
+      for (size_t i = 0; i < dim; i++) {
+	mag += object[i] * object[i];
+      }
+      mag = sqrt(mag);
+      for (size_t i = 0; i < dim; i++) {
+	object[i] /= mag;
+      }
+      objectList.put(id, object, &quantizer.globalCodebookIndex.getObjectSpace());
+      if (id % 2000000 == 0) {
+	timer.stop();
+	std::cerr << "processed " << static_cast<float>(id) / 1000000.0 << "M objects." << std::endl;
+	timer.restart();
+      }
+    }
+  } catch(NGT::Exception &err) {
+    redirector.end();
+    throw err;
+  }
+  redirector.end();
+  return count;
+}
+
 #endif
diff --git a/lib/NGT/NGTQ/Optimizer.h b/lib/NGT/NGTQ/Optimizer.h
index 51abad9..8d7e587 100644
--- a/lib/NGT/NGTQ/Optimizer.h
+++ b/lib/NGT/NGTQ/Optimizer.h
@@ -337,18 +337,18 @@ namespace QBG {
 	timer.start();
 	Matrix<float> optr;
 	optimizeRotation(
-			 iteration,
-			 vectors,
-			 xt,
-			 rs[ri],
-			 optr,
+			 iteration,		
+			 vectors,		
+			 xt,			
+			 rs[ri],		
+			 optr,			
 			 localClusters[ri],
 			 clusteringType,
 			 imode,
 			 numberOfClusters,
-			 numberOfSubvectors,
-			 subvectorSize,
-			 clusterIteration,
+			 numberOfSubvectors,	
+			 subvectorSize,		
+			 clusterIteration,	
 			 clusterSizeConstraint,
 			 clusterSizeConstraintCoefficient,
 			 convergenceLimitTimes,
@@ -366,6 +366,9 @@ namespace QBG {
     void optimizeWithinIndex(std::string indexPath);
     void optimize(std::string invector, std::string ofile, std::string global);
     void optimize(vector<vector<float>> &vectors, vector<vector<float>> &globalCentroid, Matrix<float> &r, vector<vector<NGT::Clustering::Cluster>> &localClusters, vector<double> &errors);
+    static size_t extractScaleAndOffset(const std::string indexPath, float clippingRate, int32_t nOfObjects, bool verbose);
+    static size_t convertObjectsFromInnerProductToL2(const std::string indexPath, size_t nOfObjects, bool verbose);
+    static size_t normalizeObjectsForCosine(const std::string indexPath, size_t nOfObjects, bool verbose);
 #endif
     NGT::Timer		timelimitTimer;
     size_t		subvectorSize;
@@ -373,10 +376,10 @@ namespace QBG {
     NGT::Clustering::ClusteringType	clusteringType;
     NGT::Clustering::InitializationMode	initMode;
     size_t		iteration;
-    size_t		clusterIteration;
+    size_t		clusterIteration;		
     bool		clusterSizeConstraint;
     float		clusterSizeConstraintCoefficient;
-    size_t		convergenceLimitTimes;
+    size_t		convergenceLimitTimes;		
     size_t		numberOfObjects;
     size_t		numberOfClusters;
     size_t		numberOfSubvectors;
diff --git a/lib/NGT/NGTQ/QbgCli.cpp b/lib/NGT/NGTQ/QbgCli.cpp
index d737275..a2239e9 100644
--- a/lib/NGT/NGTQ/QbgCli.cpp
+++ b/lib/NGT/NGTQ/QbgCli.cpp
@@ -37,12 +37,12 @@ class QbgCliBuildParameters : public QBG::BuildParameters {
   }
 
   void getCreationParameters() {
-    char objectType = args.getChar("o", 'f');
-    char distanceType = args.getChar("D", '2');
     creation.numOfObjects = args.getl("n", 0);
-
     creation.threadSize = args.getl("p", 24);
     creation.dimension = args.getl("d", 0);
+    auto clusterDataType = args.getString("C", "-");
+    creation.scalarQuantizationClippingRate = args.getf("r", 0.0);
+    creation.scalarQuantizationNoOfSamples = args.getl("V", 0);
 #ifdef NGTQ_QBG
     creation.numOfLocalClusters = args.getl("c", 16);
 #else
@@ -94,39 +94,84 @@ class QbgCliBuildParameters : public QBG::BuildParameters {
     creation.localInsertionRadiusCoefficient = creation.globalInsertionRadiusCoefficient;
 
 
-    switch (objectType) {
-    case 'f': creation.dataType = NGTQ::DataTypeFloat; break;
+    transform(clusterDataType.begin(), clusterDataType.end(), clusterDataType.begin(), ::tolower);
+    if (clusterDataType == "-" || clusterDataType == "pq4") {
+      creation.localClusterDataType = NGTQ::ClusterDataTypePQ4;
+    } else if (clusterDataType == "sqsu8" || clusterDataType == "sq8") {
+      creation.localClusterDataType = NGTQ::ClusterDataTypeSQSU8;
+    } else if (clusterDataType == "nq") {
+      creation.localClusterDataType = NGTQ::ClusterDataTypeNQ;
+    }
+
+    char objectType = args.getChar("o", 'f');
+    {
+      switch (objectType) {
+      case 'f': creation.dataType = NGTQ::DataTypeFloat; break;
 #ifdef NGT_HALF_FLOAT
-    case 'h': creation.dataType = NGTQ::DataTypeFloat16; break;
+      case 'h': creation.dataType = NGTQ::DataTypeFloat16; break;
 #endif
-    case 'c': creation.dataType = NGTQ::DataTypeUint8; break;
-    default:
-      std::stringstream msg;
-      msg << "Command::CreateParameters: Error: Invalid object type. " << objectType;
-      NGTThrowException(msg);
+      case 'c': creation.dataType = NGTQ::DataTypeUint8; break;
+      default:
+	std::stringstream msg;
+	msg << "Command::CreateParameters: Error: Invalid object type. " << objectType;
+	NGTThrowException(msg);
+      }
+    }
+    {
+      std::string globalObjectType = args.getString("K", "-");
+      std::string objectType;
+      if (globalObjectType == "-") {
+	if (clusterDataType == "-" || clusterDataType == "pq4") {
+	  objectType = "f";
+	} else {
+	  objectType = clusterDataType;
+	}
+      } else {
+	objectType = globalObjectType;
+      }
+      if (objectType == "f") {
+	creation.globalObjectType = NGT::ObjectSpace::ObjectType::Float;
+      } else if (objectType == "h") {
+	creation.globalObjectType = NGT::ObjectSpace::ObjectType::Float16;
+      } else if (objectType == "sqsu8" || objectType == "sq8") {
+	creation.globalObjectType = NGT::ObjectSpace::ObjectType::Qsuint8;
+      } else {
+	std::stringstream msg;
+	msg << "Command::CreateParameters: Error: Invalid global object type. " << objectType;
+	NGTThrowException(msg);
+      }
     }
 
-    switch (distanceType) {
-    case '2': creation.distanceType = NGTQ::DistanceType::DistanceTypeL2; break;
-    case '1': creation.distanceType = NGTQ::DistanceType::DistanceTypeL1; break;
-    case 'a': creation.distanceType = NGTQ::DistanceType::DistanceTypeAngle; break;
-    case 'C': creation.distanceType = NGTQ::DistanceType::DistanceTypeNormalizedCosine; break;
-    case 'E': creation.distanceType = NGTQ::DistanceType::DistanceTypeL2; break;
-#ifdef NGT_INNER_PRODUCT
-    case 'i': creation.distanceType = NGTQ::DistanceType::DistanceTypeInnerProduct; break;
-#endif
-    default:
-      std::stringstream msg;
-      msg << "Command::CreateParameters: Error: Invalid distance type. " << distanceType;
-      NGTThrowException(msg);
+    {
+      char distanceType = args.getChar("D", '2');
+      switch (distanceType) {
+      case '2': creation.distanceType = NGTQ::DistanceType::DistanceTypeL2; break;
+      case '1': creation.distanceType = NGTQ::DistanceType::DistanceTypeL1; break;
+      case 'a': creation.distanceType = NGTQ::DistanceType::DistanceTypeAngle; break;
+      case 'C': creation.distanceType = NGTQ::DistanceType::DistanceTypeNormalizedCosine; break;
+      case 'E': creation.distanceType = NGTQ::DistanceType::DistanceTypeL2; break;
+      case 'i': creation.distanceType = NGTQ::DistanceType::DistanceTypeInnerProduct; break;
+      default:
+	std::stringstream msg;
+	msg << "Command::CreateParameters: Error: Invalid distance type. " << distanceType;
+	NGTThrowException(msg);
+      }
     }
 #ifdef NGTQ_QBG
     creation.genuineDimension = creation.dimension;
-    creation.dimension = args.getl("P", creation.genuineDimension);
+    creation.dimension = args.getl("P", 0);
+    if (creation.dimension == 0) {
+      creation.dimension = ((creation.genuineDimension + 15) / 16) * 16;
+    }
     creation.dimensionOfSubvector = args.getl("Q", 0);
+    if (creation.numOfSubvectors == 0 &&
+	(creation.localClusterDataType == NGTQ::ClusterDataTypeSQSU8
+	 )) {
+      creation.numOfSubvectors = creation.dimension;
+    }
     {
-      char objectType = args.getChar("O", 'f');
-      switch (objectType) {
+      char oType = args.getChar("O", objectType);
+      switch (oType) {
       case 'f': creation.genuineDataType = ObjectFile::DataTypeFloat; break;
 #ifdef NGT_HALF_FLOAT
       case 'h': creation.genuineDataType = ObjectFile::DataTypeFloat16; break;
@@ -134,14 +179,24 @@ class QbgCliBuildParameters : public QBG::BuildParameters {
       case 'c': creation.genuineDataType = ObjectFile::DataTypeUint8; break;
       default:
 	std::stringstream msg;
-	msg << "Command::CreateParameters: Error: Invalid genuine object type. " << objectType;
+	msg << "Command::CreateParameters: Error: Invalid genuine data type. " << objectType;
 	NGTThrowException(msg);
       }
     }
 #endif
     {
-      char objectListOnMemory = args.getChar("R", 'f');
-      creation.objectListOnMemory = (objectListOnMemory == 't' || objectListOnMemory == 'T');
+      char refinementDataType = args.getChar("R", '-');
+      switch (refinementDataType) {
+      case 'f': creation.refinementDataType = NGTQ::DataTypeFloat; break;
+#ifdef NGT_HALF_FLOAT
+      case 'h': creation.refinementDataType = NGTQ::DataTypeFloat16; break;
+#endif
+      case '-': creation.refinementDataType = NGTQ::DataTypeNone; break;
+      default:
+	std::stringstream msg;
+	msg << "Command::CreateParameters: Error: Invalid refinement data type. " << refinementDataType;
+	NGTThrowException(msg);
+      }
     }
 
   }
@@ -364,7 +419,6 @@ class QbgCliBuildParameters : public QBG::BuildParameters {
   NGT::Args &args;
 };
 
-
 class SearchParameters : public NGT::Command::SearchParameters {
 public:
   SearchParameters(NGT::Args &args): NGT::Command::SearchParameters(args, "0.02") {
@@ -394,9 +448,10 @@ QBG::CLI::buildQG(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "An index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   size_t phase = args.getl("p", 0);
@@ -419,7 +474,7 @@ QBG::CLI::buildQG(NGT::Args &args)
     std::cerr << "optimizing..." << std::endl;
     optimizer.optimize(qgPath);
   }
-  bool verbose = false;
+  auto verbose = buildParameters.optimization.verbose;
   if (phase == 0 || phase == 2) {
     std::cerr << "building the inverted index..." << std::endl;
     QBG::Index::buildNGTQ(qgPath, verbose);
@@ -557,9 +612,10 @@ QBG::CLI::searchQG(NGT::Args &args) {
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "An index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;;
+    msg << usage << endl;
+    NGTThrowException(msg);
   }
 
   SearchParameters searchParameters(args);
@@ -580,14 +636,20 @@ QBG::CLI::searchQG(NGT::Args &args) {
   try {
     ::searchQG(index, searchParameters, std::cout);
   } catch (NGT::Exception &err) {
-    cerr << "qbg: Error " << err.what() << endl;
-    cerr << usage << endl;
+    std::stringstream msg;
+    msg << "qbg: Error " << err.what() << std::endl;
+    msg << usage << endl;
+    NGTThrowException(msg);
   } catch (std::exception &err) {
-    cerr << "qbg: Error " << err.what() << endl;
-    cerr << usage << endl;
+    std::stringstream msg;
+    msg << "qbg: Error " << err.what() << std::endl;
+    msg << usage << endl;
+    NGTThrowException(msg);
   } catch (...) {
-    cerr << "qbg: Error" << endl;
-    cerr << usage << endl;
+    std::stringstream msg;
+    msg << "qbg: Error ";
+    msg << usage << endl;
+    NGTThrowException(msg);
   }
 
 }
@@ -605,12 +667,14 @@ QBG::CLI::createQG(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "An index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << endl;
+    NGTThrowException(msg);
   }
   std::cerr << "creating..."  << std::endl;
   NGTQG::Index::create(indexPath, buildParameters);
+  std::cerr << "appending..."  << std::endl;
   NGTQG::Index::append(indexPath, buildParameters);
 }
 
@@ -622,14 +686,16 @@ QBG::CLI::appendQG(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "An index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << endl;
+    NGTThrowException(msg);
   }
   QBG::Index::appendFromObjectRepository(indexPath, indexPath + "/qg", false);
 }
 
 
+
 void
 QBG::CLI::info(NGT::Args &args)
 {
@@ -639,9 +705,10 @@ QBG::CLI::info(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "Index is not specified" << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << endl;
+    NGTThrowException(msg);
   }
 
   try {
@@ -663,8 +730,10 @@ QBG::CLI::info(NGT::Args &args)
       NGTQG::Index index(indexPath, 128, readOnly);
       std::cout << "The index type: QG" << std::endl;
     } catch (...) {
-      cerr << "qbg: The specified index is neither QBG nor QG." << std::endl;
-      cerr << usage << endl;
+      std::stringstream msg;
+      msg << "qbg: The specified index is neither QBG nor QG." << std::endl;
+      msg << usage << endl;
+      NGTThrowException(msg);
     }
   }
 
@@ -674,9 +743,11 @@ void
 QBG::CLI::create(NGT::Args &args)
 {
   const string usage = "Usage: qbg create "
-    " -d dimension [-o object-type (f:float|c:unsigned char)] [-D distance-function] [-n data-size] "
+    " -d dimension [-o object-type (object-list-data-type)] "
+    "[-O genuine-data-type (=object-type)] [-C cluster-data-type] [-K graph-data-type] "
+    "[-D distance-function] [-n data-size] "
     "[-p #-of-thread] [-R global-codebook-range] [-r local-codebook-range] "
-    "[-C global-codebook-size-limit] [-c local-codebook-size-limit] [-N local-division-no] "
+    "[-c local-codebook-size-limit] [-N local-division-no] "
     "[-T single-local-centroid (t|f)] [-e epsilon] [-i index-type (t:Tree|g:Graph)] "
     "[-M global-centroid-creation-mode (d|s)] [-L local-centroid-creation-mode (d|k|s)] "
     "[-s local-sample-coefficient] "
@@ -695,9 +766,10 @@ QBG::CLI::create(NGT::Args &args)
 	cerr << "rotation is " << rotationPath << "." << endl;
 	std::ifstream stream(rotationPath);
 	if (!stream) {
-	  std::cerr << "Cannot open the rotation. " << rotationPath << std::endl;
-	  cerr << usage << endl;
-	  return;
+	  std::stringstream msg;
+	  msg << "Cannot open the rotation. " << rotationPath << std::endl;
+	  msg << usage << std::endl;
+	  NGTThrowException(msg);
 	}
 	std::string line;
 	while (getline(stream, line)) {
@@ -720,8 +792,10 @@ QBG::CLI::create(NGT::Args &args)
 
     QBG::Index::create(indexPath, buildParameters, rotation, objectPath);
   } catch(NGT::Exception &err) {
-    std::cerr << err.what() << std::endl;
-    cerr << usage << endl;
+    std::stringstream msg;
+    msg << err.what() << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 }
 
@@ -737,9 +811,10 @@ QBG::CLI::load(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    std::cerr << "Not specified the index." << std::endl;
-    std::cerr << usage << std::endl;
-    return;
+     std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   std::cerr << "qbg: loading the specified blobs..." << std::endl;
@@ -770,7 +845,6 @@ QBG::CLI::load(NGT::Args &args)
 void
 QBG::CLI::search(NGT::Args &args)
 {
-  
   const string usage = "Usage: qbg search [-i g|t|s] [-n result-size] [-e epsilon] [-m mode(r|l|c|a)] "
     "[-E edge-size] [-o output-mode] [-b result expansion(begin:end:[x]step)] "
     "index(input) query.tsv(input)";
@@ -779,18 +853,20 @@ QBG::CLI::search(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "Index is not specified" << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   string query;
   try {
     query = args.get("#2");
   } catch (...) {
-    cerr << "Query is not specified" << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "Query is not specified" << endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   bool verbose = args.getBool("v");
@@ -799,6 +875,22 @@ QBG::CLI::search(NGT::Args &args)
   float epsilon	= 0.1;
 
   char searchMode	= args.getChar("M", 'n');
+  NGTQ::DataType refinementDataType = NGTQ::DataTypeAny;
+  {
+    char refinement = args.getChar("R", '-');
+    switch (refinement) {
+    case 'f': refinementDataType = NGTQ::DataTypeFloat; break;
+#ifdef NGT_HALF_FLOAT
+    case 'h': refinementDataType = NGTQ::DataTypeFloat16; break;
+#endif
+    case 'x': refinementDataType = NGTQ::DataTypeNone; break;
+    case '-': refinementDataType = NGTQ::DataTypeAny; break;
+    default:
+      std::stringstream msg;
+      msg << "Command::CreateParameters: Error: Invalid refinement data type. " << refinement;
+      NGTThrowException(msg);
+    }
+  }
   if (args.getString("e", "none") == "-") {
     // linear search
     epsilon = FLT_MAX;
@@ -860,11 +952,14 @@ QBG::CLI::search(NGT::Args &args)
     cerr << "parameter=" << beginOfParameter << "->" << endOfParameter << "," << stepOfParameter << endl;
   }
 
-  QBG::Index index(indexPath, true, verbose);
+  auto prebuilt = true;
+  QBG::Index index(indexPath, prebuilt, verbose, refinementDataType);
   std::cerr << "qbg::The index is open." << std::endl;
   std::cerr << "  vmsize=" << NGT::Common::getProcessVmSizeStr() << std::endl;
   std::cerr << "  peak vmsize=" << NGT::Common::getProcessVmPeakStr() << std::endl;
-  auto dimension = index.getQuantizer().globalCodebookIndex.getObjectSpace().getDimension();
+  if (outputMode == 'e') {
+    std::cout << "# Beginning of Evaluation" << endl;
+  }
   try {
     for (size_t trial = 0; trial < nOfTrials; trial++) {
       ifstream		is(query);
@@ -872,7 +967,6 @@ QBG::CLI::search(NGT::Args &args)
 	cerr << "Cannot open the specified file. " << query << endl;
 	return;
       }
-      if (outputMode == 's') { cout << "# Beginning of Evaluation" << endl; }
       string line;
       double totalTime = 0;
       int queryCount = 0;
@@ -884,7 +978,6 @@ QBG::CLI::search(NGT::Args &args)
 	  linestream >> value;
 	  queryVector.push_back(value);
 	}
-	queryVector.resize(dimension);
 	queryCount++;
 	for (auto parameter = beginOfParameter;
 	     parameter <= endOfParameter;
@@ -897,16 +990,11 @@ QBG::CLI::search(NGT::Args &args)
 	  searchContainer.setResults(&objects);
 	  auto re = resultExpansion;
 	  if (re < 0.0) re = parameter;
-	  if (re >= 1.0) {
-	    searchContainer.setSize(static_cast<float>(size) * re);
-	    searchContainer.setExactResultSize(size);
-	  } else {
-	    searchContainer.setSize(size);
-	    searchContainer.setExactResultSize(0);
-	  }
+	  searchContainer.setRefinementExpansion(re);
 	  auto np = nOfProbes;
 	  if (np == 0) np = parameter;
 	  searchContainer.setNumOfProbes(np);
+	  searchContainer.setSize(size);
 	  searchContainer.setEpsilon(epsilon);
 	  searchContainer.setBlobEpsilon(blobEpsilon);
 	  searchContainer.setEdgeSize(edgeSize);
@@ -934,7 +1022,7 @@ QBG::CLI::search(NGT::Args &args)
 	    cout << "# Index Type=" << "----" << endl;
 	    cout << "# Size=" << size << endl;
 	    cout << "# Epsilon=" << epsilon << endl;
-	    cout << "# Result expansion=" << re << endl;
+	    cout << "# Refinement expansion=" << re << endl;
 	    cout << "# # of probes=" << np << endl;
 	    if (nOfProbes == 0) {
 	      cout << "# Factor=" << np << endl;
@@ -943,19 +1031,34 @@ QBG::CLI::search(NGT::Args &args)
 	    }
 	    cout << "# Distance Computation=" << index.getQuantizer().distanceComputationCount << endl;
 	    cout << "# Query Time (msec)=" << timer.time * 1000.0 << endl;
+	  } else if (outputMode == 't' || outputMode =='T') {
+	    cout << queryCount << "\t";
 	  } else {
 	    cout << "Query No." << queryCount << endl;
 	    cout << "Rank\tIN-ID\tID\tDistance" << endl;
 	  }
 
-	  for (size_t i = 0; i < objects.size(); i++) {
-	    cout << i + 1 << "\t" << objects[i].id << "\t";
-	    cout << objects[i].distance << endl;
+	  if (outputMode == 't' || outputMode =='T') {
+	    for (size_t i = 0; i < objects.size(); i++) {
+	      cout << objects[i].id;
+	      if (outputMode == 'T') {
+		cout << "\t" << objects[i].distance;
+	      }
+	      if (i + 1 != objects.size()) {
+		std::cout << "\t";
+	      }
+	    }
+	    std::cout << std::endl;
+	  } else {
+	    for (size_t i = 0; i < objects.size(); i++) {
+	      cout << i + 1 << "\t" << objects[i].id << "\t";
+	      cout << objects[i].distance << endl;
+	    }
 	  }
-
+	    
 	  if (outputMode == 'e' || outputMode == 'E') {
 	    cout << "# End of Search" << endl;
-	  } else {
+	  } else if (!(outputMode == 't' || outputMode =='T')) {
 	    cout << "Query Time= " << timer.time << " (sec), " << timer.time * 1000.0 << " (msec)" << endl;
 	  }
 	}
@@ -968,18 +1071,22 @@ QBG::CLI::search(NGT::Args &args)
 	cout << "# Average Query Time (msec)=" << queryTimes.back() << endl;
 	cout << "# Number of queries=" << queryCount << endl;
 	cout << "# End of Evaluation" << endl;
-      } else {
+      } else if (!(outputMode == 't' || outputMode =='T')) {
 	cout << "Average Query Time= " << totalTime / (double)queryCount  << " (sec), "
 	     << totalTime * 1000.0 / (double)queryCount << " (msec), ("
 	     << totalTime << "/" << queryCount << ")" << endl;
       }
     }
   } catch (NGT::Exception &err) {
-    cerr << "Error " << err.what() << endl;
-    cerr << usage << endl;
+    std::stringstream msg;
+    msg << "Error " << err.what() << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   } catch (...) {
-    cerr << "Error" << endl;
-    cerr << usage << endl;
+    std::stringstream msg;
+    msg << "Error" << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
   if (outputMode == 'e' || outputMode == 'E') {
     if (nOfTrials >= 1) {
@@ -993,6 +1100,227 @@ QBG::CLI::search(NGT::Args &args)
   index.close();
 }
 
+void
+QBG::CLI::batchSearch(NGT::Args &args)
+{
+  const string usage = "Usage: qbg search [-i g|t|s] [-n result-size] [-e epsilon] [-m mode(r|l|c|a)] "
+    "[-E edge-size] [-o output-mode] [-b result expansion(begin:end:[x]step)] "
+    "index(input) query.tsv(input)";
+  args.parse("v");
+  string indexPath;
+  try {
+    indexPath = args.get("#1");
+  } catch (...) {
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
+  }
+
+  string query;
+  try {
+    query = args.get("#2");
+  } catch (...) {
+    std::stringstream msg;
+    msg << "Query is not specified" << endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
+  }
+
+  bool verbose = args.getBool("v");
+  size_t size		= args.getl("n", 20);
+  char outputMode	= args.getChar("o", '-');
+  float epsilon	= 0.1;
+
+  NGTQ::DataType refinementDataType = NGTQ::DataTypeAny;
+  {
+    char refinement = args.getChar("R", '-');
+    switch (refinement) {
+    case 'f': refinementDataType = NGTQ::DataTypeFloat; break;
+#ifdef NGT_HALF_FLOAT
+    case 'h': refinementDataType = NGTQ::DataTypeFloat16; break;
+#endif
+    case 'x': refinementDataType = NGTQ::DataTypeNone; break;
+    case '-': refinementDataType = NGTQ::DataTypeAny; break;
+    default:
+      std::stringstream msg;
+      msg << "Command::CreateParameters: Error: Invalid refinement data type. " << refinement;
+      NGTThrowException(msg);
+    }
+  }
+  if (args.getString("e", "none") == "-") {
+    // linear search
+    epsilon = FLT_MAX;
+  } else {
+    epsilon = args.getf("e", 0.1);
+  }
+  float blobEpsilon = args.getf("B", 0.05);
+  size_t edgeSize = args.getl("E", 0);
+  float cutback = args.getf("C", 0.0);
+  size_t explorationSize = args.getf("N", 256);
+  size_t nOfProbes = 0;
+  float resultExpansion = -1;
+  size_t nOfTrials = args.getl("T", 1);
+  if (nOfTrials != 1) {
+    std::cerr << "# of trials=" << nOfTrials << std::endl;
+  }
+  std::vector<double> queryTimes;
+
+  float beginOfParameter, endOfParameter, stepOfParameter;
+  //-/bool mulStep = false;
+  {
+    beginOfParameter = 0.0;
+    endOfParameter = 0.0;
+    stepOfParameter = 1;
+    vector<string> tokens;
+    if (args.getString("p", "-").find_first_of(':') == std::string::npos) {
+      resultExpansion = args.getf("p", 0.0);
+    }
+    if (args.getString("P", "-").find_first_of(':') == std::string::npos) {
+      nOfProbes = args.getl("P", 10);
+    }
+    if (resultExpansion < 0 && nOfProbes == 0) {
+      std::cerr << "Cannot specify both -p and -P as a fluctuating value. -P is prioritized." << std::endl;
+      NGT::Common::tokenize(args.getString("p", "-"), tokens, ":");
+      resultExpansion = NGT::Common::strtod(tokens[0]);
+      tokens.clear();
+    }
+    if (resultExpansion < 0) {
+      NGT::Common::tokenize(args.getString("p", "-"), tokens, ":");
+    } else if (nOfProbes == 0) {
+      NGT::Common::tokenize(args.getString("P", "-"), tokens, ":");
+    }
+    if (tokens.size() >= 2) {
+      beginOfParameter = NGT::Common::strtod(tokens[0]);
+      endOfParameter = beginOfParameter;
+      if (tokens.size() >= 2) { endOfParameter = NGT::Common::strtod(tokens[1]); }
+      if (tokens.size() >= 3) {
+	if (tokens[2][0] == 'x') {
+	  //-/mulStep = true;
+	  stepOfParameter = NGT::Common::strtod(tokens[2].substr(1));
+	} else {
+	  stepOfParameter = NGT::Common::strtod(tokens[2]);
+	}
+      }
+    }
+  }
+  if (debugLevel >= 1) {
+    cerr << "size=" << size << endl;
+    cerr << "parameter=" << beginOfParameter << "->" << endOfParameter << "," << stepOfParameter << endl;
+  }
+
+  QBG::Index index(indexPath, true, verbose, refinementDataType);
+  std::cerr << "qbg::The index is open." << std::endl;
+  std::cerr << "  vmsize=" << NGT::Common::getProcessVmSizeStr() << std::endl;
+  std::cerr << "  peak vmsize=" << NGT::Common::getProcessVmPeakStr() << std::endl;
+  if (outputMode == 'e') {
+    std::cout << "# Beginning of Evaluation" << endl;
+  }
+  try {
+    for (size_t trial = 0; trial < nOfTrials; trial++) {
+      auto pseudoDimension = index.getQuantizer().property.dimension;
+      std::ifstream		is(query);
+      if (!is) {
+        std::cerr << "Cannot open the specified file. " << query << std::endl;
+	return;
+      }
+      string line;
+      std::vector<std::vector<float>> queries;
+      while(getline(is, line)) {
+	std::stringstream	linestream(line);
+	{
+	  vector<float>	queryVector;
+	  while (!linestream.eof()) {
+	    float value;
+	    linestream >> value;
+	    queryVector.emplace_back(value);
+	  }
+	  queryVector.resize(pseudoDimension);
+	  queries.emplace_back(queryVector);
+	}
+      }
+      //auto  qs(new float[queries.size() * pseudoDimension]);
+      // 上記のようにautoで書くとメモリリークが発生する
+      std::unique_ptr<float[]> qs(new float[queries.size() * pseudoDimension]);
+      for (size_t i = 0; i < queries.size(); i++) {
+	memcpy(&qs[i * pseudoDimension], queries[i].data(), pseudoDimension * sizeof(float));
+      }
+      std::cerr << "# of queries=" << queries.size() << std::endl;
+      QBG::BatchSearchContainer searchContainer;
+      searchContainer.setObjectVectors(&qs[0], queries.size(), pseudoDimension);
+      auto re = resultExpansion;
+      //if (re < 0.0) re = parameter;
+      if (re < 0.0) abort();
+      searchContainer.setRefinementExpansion(re);
+      auto np = nOfProbes;
+      //if (np == 0) np = parameter;
+      if (np == 0) abort();
+      searchContainer.setNumOfProbes(np);
+      searchContainer.setEpsilon(epsilon);
+      searchContainer.setBlobEpsilon(blobEpsilon);
+      searchContainer.setEdgeSize(edgeSize);
+      searchContainer.setCutback(cutback);
+      searchContainer.setGraphExplorationSize(explorationSize);
+      index.searchInTwoSteps(searchContainer);
+
+      auto &result = searchContainer.getBatchResult();
+      for (auto it = result.begin(); it != result.end(); ++it) {
+	if (outputMode == 't' || outputMode == 'T') {
+	  auto no = distance(result.begin(), it) + 1;
+	  std::cout << no << "\t";
+	  for (auto r = (*it).begin(); r != (*it).end(); ++r) {
+	    std::cout << (*r).id;
+	    if (outputMode == 'T') {
+	      std::cout << "\t" << (*r).distance;
+	    }
+	    if (r + 1 != (*it).end()) {
+	      std::cout << "\t";
+	    }
+	  }
+	  std::cout << std::endl;;
+	} else {
+	  auto no = distance(result.begin(), it) + 1;
+	  if (outputMode == 'e') {
+	    std::cout << "# Query No.=" << no << endl;
+	    std::cout << "# Epsilon=" << epsilon << endl;
+	  } else {
+	    std::cout << "Query No." << no << std::endl;
+	    std::cout << "Rank\tIN-ID\tDistance" << std::endl;
+	  }
+	  for (auto r = (*it).begin(); r != (*it).end(); ++r) {
+	    auto rank = distance((*it).begin(), r);
+	    std::cout << rank + 1 << "\t" << (*r).id << "\t";
+	    std::cout << (*r).distance << std::endl;
+	  }
+	  if (outputMode == 'e') {
+	    std::cout << "# End of Search" << endl;
+	  }
+	}
+	if (outputMode == 'e') {
+	  std::cout << "# End of Query" << endl;
+	}
+      }
+    }
+  } catch (NGT::Exception &err) {
+    std::stringstream msg;
+    msg << "Error " << err.what() << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
+  } catch (...) {
+    std::stringstream msg;
+    msg << "Error" << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
+  }
+  if (outputMode == 'e' || outputMode == 'E') {
+    if (nOfTrials >= 1) {
+    }
+    std::cout << "# End of Evaluation" << endl;
+    std::cout << "#   vmsize=" << NGT::Common::getProcessVmSizeStr() << std::endl;
+    std::cout << "#   peak vmsize=" << NGT::Common::getProcessVmPeakStr() << std::endl;
+  }
+  index.close();
+}
 
 void
 QBG::CLI::append(NGT::Args &args)
@@ -1003,16 +1331,16 @@ QBG::CLI::append(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "Index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
   string data;
   try {
     data = args.get("#2");
   } catch (...) {
-    cerr << usage << endl;
-    cerr << "Data is not specified." << endl;
+    std::cerr << "Data is not specified." << std::endl;
   }
 
   size_t dataSize = args.getl("n", 0);
@@ -1047,29 +1375,30 @@ QBG::CLI::append(NGT::Args &args)
 void
 QBG::CLI::insert(NGT::Args &args)
 {
-  const string usage = "Usage: qbg append [-n data-size] [-m b|e] [-v] index(output) data.tsv(input)";
+  const string usage = "Usage: qbg insert [-n data-size] [-m b|e] [-v] index(output) data.tsv(input)";
   args.parse("v");
   string indexPath;
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "Index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
   string data;
   try {
     data = args.get("#2");
   } catch (...) {
-    cerr << usage << endl;
-    cerr << "Data is not specified." << endl;
+    std::cerr  << "Data is not specified." << std::endl;
   }
 
   std::ifstream stream(data);
   if (!stream) {
-    std::cerr << "Cannot open the data file. " << data << std::endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "Cannot open the data file. " << data << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
   bool verbose = args.getBool("v");
 
@@ -1108,16 +1437,19 @@ QBG::CLI::remove(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "Index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
   uint32_t ids;
   try {
     ids = args.getl("#2", 0);
   } catch (...) {
-    cerr << usage << endl;
-    cerr << "Data is not specified." << endl;
+    std::stringstream msg;
+    msg << "Data is not specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   auto verbose = args.getBool("v");
@@ -1133,6 +1465,56 @@ QBG::CLI::remove(NGT::Args &args)
 
 }
 
+void
+QBG::CLI::expandBlob(NGT::Args &args)
+{
+  const string usage = "Usage: qbg expand-blob index [centroids-file]";
+  args.parse("v");
+  std::string indexPath;
+  try {
+    indexPath = args.get("#1");
+  } catch (...) {
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
+  }
+  std::string clusterCentroidsPath;
+  try {
+    clusterCentroidsPath = args.get("#2");
+  } catch (...) {}
+
+  NGTQ::DataType refinementDataType = NGTQ::DataTypeAny;
+  {
+    char dataType = args.getChar("R", '-');
+    switch (dataType) {
+    case 'f': refinementDataType = NGTQ::DataTypeFloat; break;
+#ifdef NGT_HALF_FLOAT
+    case 'h': refinementDataType = NGTQ::DataTypeFloat16; break;
+#endif
+    case 'x': refinementDataType = NGTQ::DataTypeNone; break;
+    case '-': refinementDataType = NGTQ::DataTypeAny; break;
+    default:
+      std::stringstream msg;
+      msg << "Command::CreateParameters: Error: Invalid refinement data type. " << dataType;
+      NGTThrowException(msg);
+    }
+  }
+
+  auto verbose = args.getBool("v");
+
+  NGT::SearchContainer ngtSearchContainer;
+  ngtSearchContainer.setSize(50);
+  QBG::SearchContainer qbgSearchContainer;
+  qbgSearchContainer.setSize(args.getl("n", 20));
+  qbgSearchContainer.setBlobEpsilon(args.getl("b", 0.1));
+  float rate = args.getf("r", -1.0);
+  
+  QBG::Index::expandBlob(indexPath, clusterCentroidsPath, ngtSearchContainer,
+			 qbgSearchContainer, rate, refinementDataType, verbose);
+
+}
+
 
 void
 QBG::CLI::buildIndex(NGT::Args &args)
@@ -1142,9 +1524,10 @@ QBG::CLI::buildIndex(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "An index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
   char mode = args.getChar("m", '-');
 
@@ -1154,7 +1537,7 @@ QBG::CLI::buildIndex(NGT::Args &args)
 
   std::vector<std::vector<float>> quantizerCodebook;
   std::vector<uint32_t> codebookIndex;
-  std::vector<uint32_t> objectIndex;
+  std::vector<std::vector<uint32_t>> objectIndex;
 
   if (mode == 'q' || mode == '-') {
     {
@@ -1167,9 +1550,10 @@ QBG::CLI::buildIndex(NGT::Args &args)
 	}
 	std::ifstream stream(codebookPath);
 	if (!stream) {
-	  std::cerr << "Cannot open the codebook. " << codebookPath << std::endl;
-	  cerr << usage << endl;
-	  return;
+	  std::stringstream msg;
+	  msg << "Cannot open the codebook. " << codebookPath << std::endl;
+	  msg << usage << std::endl;
+	  NGTThrowException(msg);
 	}
 	std::string line;
 	while (getline(stream, line)) {
@@ -1180,10 +1564,11 @@ QBG::CLI::buildIndex(NGT::Args &args)
 	    object.push_back(NGT::Common::strtof(token));
 	  }
 	  if (!quantizerCodebook.empty() && quantizerCodebook[0].size() != object.size()) {
-	    cerr << "The specified quantizer codebook is invalid. " << quantizerCodebook[0].size()
-		 << ":" << object.size() << ":" << quantizerCodebook.size() << ":" << line << endl;
-	    cerr << usage << endl;
-	    return;
+	    std::stringstream msg;
+	    msg << "The specified quantizer codebook is invalid. " << quantizerCodebook[0].size()
+		<< ":" << object.size() << ":" << quantizerCodebook.size() << ":" << line << std::endl;
+	    msg << usage << std::endl;
+	    NGTThrowException(msg);
 	  }
 	  if (!object.empty()) {
 	    quantizerCodebook.push_back(object);
@@ -1204,18 +1589,20 @@ QBG::CLI::buildIndex(NGT::Args &args)
 	cerr << "codebook index is " << codebookIndexPath << "." << endl;
 	std::ifstream stream(codebookIndexPath);
 	if (!stream) {
-	  std::cerr << "Cannot open the codebook index. " << codebookIndexPath << std::endl;
-	  cerr << usage << endl;
-	  return;
+	  std::stringstream msg;
+	  msg << "Cannot open the codebook index. " << codebookIndexPath << std::endl;
+	  msg << usage << std::endl;
+	  NGTThrowException(msg);
 	}
 	std::string line;
 	while (getline(stream, line)) {
 	  std::vector<std::string> tokens;
 	  NGT::Common::tokenize(line, tokens, " \t");
 	  if (tokens.size() != 1) {
-	    cerr << "The specified codebook index is invalid. " << line << std::endl;
-	    cerr << usage << endl;
-	    return;
+	    std::stringstream msg;
+	    msg << "The specified codebook index is invalid. " << line << std::endl;
+	    msg << usage << std::endl;
+	    NGTThrowException(msg);
 	  }
 	  codebookIndex.push_back(NGT::Common::strtol(tokens[0]));
 	}
@@ -1231,25 +1618,42 @@ QBG::CLI::buildIndex(NGT::Args &args)
 	} catch (...) {
 	  objectIndexPath = indexPath + "/ws/kmeans-cluster_index.tsv";
 	}
-	std::ifstream stream(objectIndexPath);
-	if (!stream) {
-	  std::cerr << "Cannot open the codebook index. " << objectIndexPath << std::endl;
-	  cerr << usage << endl;
-	  return;
+	{
+	  std::ifstream stream(objectIndexPath);
+	  if (!stream) {
+	    std::stringstream msg;
+	    msg << "Cannot open the codebook index. " << objectIndexPath;
+	    NGTThrowException(msg);
+	  }
+	  size_t nOfObjs = 0;
+	  std::string line;
+	  while (getline(stream, line)) nOfObjs++;
+	  objectIndex.resize(nOfObjs);
 	}
-	std::string line;
-	while (getline(stream, line)) {
-	  std::vector<std::string> tokens;
-	  NGT::Common::tokenize(line, tokens, " \t");
-	  std::vector<float> object;
-	  if (tokens.size() != 1) {
-	    cerr << "The specified codebook index is invalid. " << line << std::endl;
-	    cerr << usage << endl;
-	    return;
+	{
+	  std::ifstream stream(objectIndexPath);
+	  if (!stream) {
+	    std::stringstream msg;
+	    msg << "Cannot open the codebook index. " << objectIndexPath << std::endl;
+	    msg << usage << std::endl;
+	    NGTThrowException(msg);
+	  }
+	  {
+	    std::string line;
+	    size_t idx = 0;
+	    while (getline(stream, line)) {
+	      std::vector<std::string> tokens;
+	      NGT::Common::tokenize(line, tokens, " \t");
+	      if (tokens.size() > 0) {
+		objectIndex[idx].reserve(tokens.size());
+		for (auto &token : tokens) {
+		  objectIndex[idx].emplace_back(NGT::Common::strtol(token));
+		}
+	      }
+	      idx++;
+	    }
 	  }
-	  objectIndex.push_back(NGT::Common::strtol(tokens[0]));
 	}
-
       } catch (...) {}
     }
 
@@ -1284,13 +1688,29 @@ QBG::CLI::build(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "An index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
-
   std::string phaseString = args.getString("p", "1-3");
+  if (phaseString.substr(0, 10) == "assign-all") {
+    std::vector<std::string> tokens;
+    NGT::Common::tokenize(phaseString, tokens, ":");
+    int64_t lowerBoundOfObjects = 0;
+    size_t noOfNearestNeighbors = 1;
+    if (tokens.size() >= 2) {
+      lowerBoundOfObjects = NGT::Common::strtod(tokens[1]);
+    }
+    if (tokens.size() >= 3) {
+      noOfNearestNeighbors = NGT::Common::strtod(tokens[2]);
+    }
+    std::cerr << "qbg: assign-all" << std::endl;
+    HierarchicalKmeans hierarchicalKmeans(buildParameters);
+    hierarchicalKmeans.assignAll(indexPath, lowerBoundOfObjects, noOfNearestNeighbors);
+    return;
+  }
   bool phase[3];
   if (phaseString.empty()) {
     phase[0] = phase[1] = phase[2] = true;
@@ -1307,9 +1727,10 @@ QBG::CLI::build(NGT::Args &args)
     }
     if (tokens.size() >= 2) { endOfPhase = NGT::Common::strtod(tokens[1]) - 1;}
     if (tokens.size() >= 3 || tokens.size() == 0) {
-      cerr << "The specified phases are invalid! " << phaseString << endl;
-      cerr << usage << endl;
-      return;
+      std::stringstream msg;
+      msg << "The specified phases are invalid! " << phaseString << std::endl;
+      msg << usage << std::endl;
+      NGTThrowException(msg);
     }
     phase[0] = phase[1] = phase[2] = false;
     for (int p = beginOfPhase; p <= endOfPhase; p++) {
@@ -1377,14 +1798,17 @@ QBG::CLI::rebuild(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "An index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
   auto start = args.getl("s", 0);
   if (start == 0) {
-    std::cerr << "Start ID(-s) should be set. The ID is the smallest ID of the objects that are appended but not indexed." << std::endl;
-    std::cerr << usage << std::endl;
+    std::stringstream msg;
+    msg << "Start ID(-s) should be set. The ID is the smallest ID of the objects that are appended but not indexed." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   NGT::Timer timer;
@@ -1416,9 +1840,10 @@ QBG::CLI::hierarchicalKmeans(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "Index is not specified" << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   std::string prefix;
@@ -1459,9 +1884,10 @@ QBG::CLI::assign(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch (...) {
-    cerr << "Any index is not specified" << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   std::string queryPath;
@@ -1469,9 +1895,10 @@ QBG::CLI::assign(NGT::Args &args)
   try {
     queryPath = args.get("#2");
   } catch (...) {
-    cerr << "Any query is not specified" << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No query is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   auto epsilon = args.getf("e", 0.1);
@@ -1538,9 +1965,10 @@ QBG::CLI::extract(NGT::Args &args)
   try {
     objectPath = args.get("#1");
   } catch (...) {
-    std::cerr << "Object file is not specified." << std::endl;
-    std::cerr << usage << std::endl;
-    return;
+    std::stringstream msg;
+    msg << "Object file is not specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   std::ostream *os;
@@ -1776,9 +2204,10 @@ QBG::CLI::optimize(NGT::Args &args)
   try {
     indexPath = args.get("#1");
   } catch(...) {
-    cerr << "qbg: index is not specified." << endl;
-    cerr << usage << endl;
-    return;
+    std::stringstream msg;
+    msg << "No index is specified." << std::endl;
+    msg << usage << std::endl;
+    NGTThrowException(msg);
   }
 
   string invector;
diff --git a/lib/NGT/NGTQ/QbgCli.h b/lib/NGT/NGTQ/QbgCli.h
index 8603e4b..ff12cf5 100644
--- a/lib/NGT/NGTQ/QbgCli.h
+++ b/lib/NGT/NGTQ/QbgCli.h
@@ -32,8 +32,10 @@ namespace QBG {
     void append(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
     void insert(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
     void remove(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
+    void expandBlob(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
     void buildIndex(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
     void hierarchicalKmeans(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
+    void batchSearch(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
     void search(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
     void assign(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
     void extract(NGT::Args &args) { std::cerr << "not implemented." << std::endl; };
@@ -53,8 +55,10 @@ namespace QBG {
     void append(NGT::Args &args);
     void insert(NGT::Args &args);
     void remove(NGT::Args &args);
+    void expandBlob(NGT::Args &args);
     void buildIndex(NGT::Args &args);
     void hierarchicalKmeans(NGT::Args &args);
+    void batchSearch(NGT::Args &args);
     void search(NGT::Args &args);
     void assign(NGT::Args &args);
     void extract(NGT::Args &args);
@@ -89,55 +93,60 @@ namespace QBG {
 
       debugLevel = args.getl("X", 0);
 
-      try {
-	if (debugLevel >= 1) {
-	  cerr << "ngt::command=" << command << endl;
-	}
-	if (command == "search") {
-	  search(args);
-	} else if (command == "create") {
-	  create(args);
-	} else if (command == "load") {
-	  load(args);
-	} else if (command == "append") {
-	  append(args);
-	} else if (command == "insert") {
-	  insert(args);
-	} else if (command == "remove") {
-	  remove(args);
-	} else if (command == "build-index") {
-	  buildIndex(args);
-	} else if (command == "kmeans") {
-	  hierarchicalKmeans(args);
-	} else if (command == "assign") {
-	  assign(args);
-	} else if (command == "extract") {
-	  extract(args);
-	} else if (command == "gt") {
-	  gt(args);
-	} else if (command == "gt-range") {
-	  gtRange(args);
-	} else if (command == "optimize") {
-	  optimize(args);
-	} else if (command == "build") {
-	  build(args);
-	} else if (command == "rebuild") {
-	  rebuild(args);
-	} else if (command == "create-qg") {
-	  createQG(args);
-	} else if (command == "build-qg") {
-	  buildQG(args);
-	} else if (command == "append-qg") {
-	  appendQG(args);
-	} else if (command == "search-qg") {
-	  searchQG(args);
-	} else if (command == "info") {
-	  info(args);
-	} else {
-	  cerr << "qbg: Illegal command. " << command << endl;
-	}
-      } catch(NGT::Exception &err) {
-	cerr << "qbg: Error: " << err.what() << endl;
+      if (debugLevel >= 1) {
+	cerr << "ngt::command=" << command << endl;
+      }
+      if (command == "search") {
+	search(args);
+      } else if (command == "batch-search") {
+	batchSearch(args);
+      } else if (command == "create") {
+	create(args);
+      } else if (command == "load") {
+	load(args);
+      } else if (command == "append") {
+	append(args);
+      } else if (command == "insert") {
+	insert(args);
+      } else if (command == "remove") {
+	remove(args);
+      } else if (command == "expand-blob") {
+	expandBlob(args);
+      } else if (command == "build-index") {
+	buildIndex(args);
+      } else if (command == "kmeans") {
+	hierarchicalKmeans(args);
+      } else if (command == "assign") {
+	assign(args);
+      } else if (command == "extract") {
+	extract(args);
+      } else if (command == "gt") {
+	gt(args);
+      } else if (command == "gt-range") {
+	gtRange(args);
+      } else if (command == "optimize") {
+	optimize(args);
+      } else if (command == "build") {
+	build(args);
+      } else if (command == "rebuild") {
+	rebuild(args);
+      } else if (command == "create-qg") {
+	createQG(args);
+      } else if (command == "build-qg") {
+	buildQG(args);
+      } else if (command == "append-qg") {
+	appendQG(args);
+      } else if (command == "search-qg") {
+	searchQG(args);
+      } else if (command == "info") {
+	info(args);
+      } else if (command == "-h") {
+	help();
+      } else {
+	help();
+	std::stringstream msg;
+	msg << "qbg: Illegal command. " << command << endl;
+	NGTThrowException(msg);
       }
     }
 
diff --git a/lib/NGT/NGTQ/QuantizedBlobGraph.h b/lib/NGT/NGTQ/QuantizedBlobGraph.h
index 6de5c5c..faba5e8 100644
--- a/lib/NGT/NGTQ/QuantizedBlobGraph.h
+++ b/lib/NGT/NGTQ/QuantizedBlobGraph.h
@@ -27,6 +27,7 @@
 #include	<thread>
 
 
+
 namespace QBG {
 
   class CreationParameters {
@@ -51,13 +52,17 @@ namespace QBG {
       localCentroidCreationMode		= NGTQ::CentroidCreationModeStatic;
       localIDByteSize			= 1;
       localClusteringSampleCoefficient	= 10;
-      objectListOnMemory		= false;
+      refinementDataType		= NGTQ::DataTypeNone;
+      localClusterDataType		= NGTQ::ClusterDataTypePQ4;
+      scalarQuantizationClippingRate	= 0.01;
+      scalarQuantizationNoOfSamples	= 0;
       
       globalEdgeSizeForCreation		= 10;
       globalEdgeSizeForSearch		= 40;
       globalIndexType			= NGT::Property::GraphAndTree;
       globalInsertionRadiusCoefficient	= 1.1;
       globalGraphType			= NGT::NeighborhoodGraph::GraphTypeANNG;
+      globalObjectType			= NGT::ObjectSpace::ObjectType::Float;
 
       localIndexType			= NGT::Property::GraphAndTree;
       localInsertionRadiusCoefficient	= 1.1;
@@ -89,12 +94,17 @@ namespace QBG {
       property.localCentroidCreationMode = creation.localCentroidCreationMode;
       property.localIDByteSize		= creation.localIDByteSize;
       property.localClusteringSampleCoefficient = creation.localClusteringSampleCoefficient;
-      property.objectListOnMemory	= creation.objectListOnMemory;
+      property.localClusterDataType	= creation.localClusterDataType;
+      property.scalarQuantizationClippingRate	= creation.scalarQuantizationClippingRate;
+      property.scalarQuantizationNoOfSamples	= creation.scalarQuantizationNoOfSamples;
+      property.refinementDataType	= creation.refinementDataType;
       globalProperty.edgeSizeForCreation = creation.globalEdgeSizeForCreation;
       globalProperty.edgeSizeForSearch	= creation.globalEdgeSizeForSearch;
       globalProperty.indexType		= creation.globalIndexType;
       globalProperty.insertionRadiusCoefficient = creation.globalInsertionRadiusCoefficient;
       globalProperty.graphType		= creation.globalGraphType;
+      globalProperty.objectType		= creation.globalObjectType;
+      globalProperty.seedSize		= 0;
       localProperty.indexType		= creation.localIndexType;
       localProperty.insertionRadiusCoefficient = creation.localInsertionRadiusCoefficient;
       localProperty.graphType		= creation.localGraphType;
@@ -127,13 +137,17 @@ namespace QBG {
     NGTQ::CentroidCreationMode		localCentroidCreationMode;
     size_t				localIDByteSize;
     size_t				localClusteringSampleCoefficient;
-    bool				objectListOnMemory;
-    
+    NGTQ::DataType			refinementDataType;
+    NGTQ::ClusterDataType		localClusterDataType;
+    float				scalarQuantizationClippingRate;
+    size_t				scalarQuantizationNoOfSamples;
+
     size_t				globalEdgeSizeForCreation;
     size_t				globalEdgeSizeForSearch;
     NGT::Property::IndexType		globalIndexType;
     float				globalInsertionRadiusCoefficient;
     NGT::Property::GraphType		globalGraphType;
+    NGT::ObjectSpace::ObjectType	globalObjectType;
 
     NGT::Property::IndexType		localIndexType;
     float				localInsertionRadiusCoefficient;
@@ -226,10 +240,10 @@ namespace QBG {
 
     float		timelimit;
     size_t		iteration;
-    size_t		clusterIteration;
+    size_t		clusterIteration;		
     bool		clusterSizeConstraint;
     float		clusterSizeConstraintCoefficient;
-    size_t		convergenceLimitTimes;
+    size_t		convergenceLimitTimes;		
     size_t		numOfObjects;
     size_t		numOfClusters;
     size_t		numOfSubvectors;
@@ -275,15 +289,14 @@ namespace QBG {
     bool				verbose;
   };
 
-
   class SearchContainer : public NGT::SearchContainer {
   public:
     SearchContainer(NGT::Object &q): NGT::SearchContainer(q),
       cutback(0.0), graphExplorationSize(50), exactResultSize(0),
-      blobExplorationCoefficient(0.0), numOfProbes(0) {}
+      blobExplorationCoefficient(1.0), numOfProbes(5), refinementExpansion(0.0) {}
     SearchContainer(): NGT::SearchContainer(*reinterpret_cast<NGT::Object*>(0)),
       cutback(0.0), graphExplorationSize(50), exactResultSize(0),
-      blobExplorationCoefficient(0.0), numOfProbes(0) {}
+      blobExplorationCoefficient(1.0), numOfProbes(5), refinementExpansion(0.0) {}
     SearchContainer(SearchContainer &sc, NGT::Object &q): NGT::SearchContainer(q) {
       QBG::SearchContainer::operator=(sc);
     }
@@ -294,6 +307,7 @@ namespace QBG {
       exactResultSize = sc.exactResultSize;
       blobExplorationCoefficient = sc.blobExplorationCoefficient;
       numOfProbes = sc.numOfProbes;
+      refinementExpansion = sc.refinementExpansion;
       objectVector = sc.objectVector;
       return *this;
     }
@@ -302,15 +316,38 @@ namespace QBG {
     void setExactResultSize(size_t esize) { exactResultSize = esize; }
     void setBlobEpsilon(float c) { blobExplorationCoefficient = c + 1.0; }
     void setNumOfProbes(size_t p) { numOfProbes = p; }
-    void setObjectVector(std::vector<float> &query) { objectVector = std::move(query); }
+    void setObjectVector(std::vector<float> &query) { objectVector = query; }
+    void setRefinementExpansion(float re) { refinementExpansion = re; }
     float       cutback;
     size_t      graphExplorationSize;
     size_t      exactResultSize;
     float       blobExplorationCoefficient;
     size_t	numOfProbes;
+    float	refinementExpansion;
     std::vector<float>	objectVector;
   };
 
+  class BatchSearchContainer : public SearchContainer {
+   public:
+    BatchSearchContainer(NGT::Object &q): SearchContainer(q), objectVectors(0), numOfQueries(0) {}
+    BatchSearchContainer(): objectVectors(0), numOfQueries(0) {}
+    BatchSearchContainer(SearchContainer &sc, NGT::Object &q): SearchContainer(sc, q), objectVectors(0), numOfQueries(0) {}
+
+    void setObjectVectors(void *qs, size_t nq, size_t dim) {
+      objectVectors = reinterpret_cast<float*>(qs);
+      numOfQueries = nq;
+      dimension = dim;
+    }
+    void *getQuery(size_t idx) { return objectVectors + dimension * idx; }
+    NGT::ObjectDistances &getBatchResult(size_t i) { return batchResult[i]; }
+    std::vector<NGT::ObjectDistances> &getBatchResult() { return batchResult; }
+
+    float *objectVectors;
+    size_t numOfQueries;
+    size_t dimension;
+    std::vector<NGT::ObjectDistances> batchResult;
+  };
+
   class QuantizedBlobGraphRepository : public NGTQG::QuantizedGraphRepository {
   public:
     QuantizedBlobGraphRepository(NGTQ::Index &quantizedIndex): NGTQG::QuantizedGraphRepository(quantizedIndex){
@@ -326,7 +363,7 @@ namespace QBG {
       NGT::Timer timer;
       timer.start();
       for (size_t gid = 1; gid < quantizedIndex.getInvertedIndexSize(); gid++) {
-	if (gid % 100000 == 0) {
+	if (gid % 10000 == 0) {
 	  timer.stop();
 	  std::cerr << "The number of processed blobs=" << gid << " VmSize=" <<  NGT::Common::getProcessVmSizeStr() << " Elapsed time=" << timer << std::endl;
 	  timer.restart();
@@ -345,40 +382,27 @@ namespace QBG {
 	    continue;
 	  }
 	}
-	NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedIndex.getQuantizer().divisionNo, invertedIndexObjects.size());
-	rearrange(invertedIndexObjects, (*this)[gid], quantizedStream);
+	rearrange(invertedIndexObjects, (*this)[gid], quantizedIndex.getQuantizer());
       }
 #endif
     }
 
-    static void rearrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects, NGTQ::QuantizedObjectProcessingStream &quantizedStream) {
-	rearrangedObjects.clear();
-	rearrangedObjects.ids.reserve(invertedIndexObjects.size());
-	for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) {
-	  rearrangedObjects.ids.push_back(invertedIndexObjects[oidx].id);
-	  for (size_t idx = 0; idx < invertedIndexObjects.numOfSubvectors; idx++) {
-#ifdef NGTQ_UINT8_LUT
-#ifdef NGTQ_SIMD_BLOCK_SIZE
-            size_t dataNo = oidx;
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-	    abort();
-#else
-	    quantizedStream.arrangeQuantizedObject(dataNo, idx, invertedIndexObjects[oidx].localID[idx] - 1);
-#endif
-#else
-	    objectData[idx * noobjs + dataNo] = invertedIndexObjects[oidx].localID[idx] - 1;
-#endif
-#else
-	    objectData[idx * noobjs + dataNo] = invertedIndexObjects[oidx].localID[idx];
-#endif
-	  }
-	}
+    static void rearrangeObjects(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects, NGTQ::Quantizer &quantizer) {
+      rearrangedObjects.subspaceID = invertedIndexObjects.subspaceID;
+      auto &quantizedObjectDistance = quantizer.getQuantizedObjectDistance();
+      rearrangedObjects.objects = quantizedObjectDistance.generateRearrangedObjects(invertedIndexObjects);
+      //rearrangedObjects.objects = quantizedStream.compressIntoUint4();
+    }
 
+    static void rearrangeObjects(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects) {
+	NGTQ::QuantizedObjectProcessingStream quantizedStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size());
+	quantizedStream.arrange(invertedIndexObjects);
 	rearrangedObjects.subspaceID = invertedIndexObjects.subspaceID;
 	rearrangedObjects.objects = quantizedStream.compressIntoUint4();
     }
 
-    static void rearrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects) {
+    //  static void rearrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects) {
+    static void rearrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects, NGTQ::Quantizer &quantizer) {    
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR)
       std::cerr << "construct: Not implemented" << std::endl;
       abort();
@@ -386,17 +410,19 @@ namespace QBG {
       if (invertedIndexObjects.numOfSubvectors == 0) {
 	NGTThrowException("# of subvectors is zero.");
       }
-      
-      //(*this).resize(quantizedIndex.getInvertedIndexSize());
       NGT::Timer timer;
       timer.start();
       {
+	rearrangedObjects.clear();
+	rearrangedObjects.ids.reserve(invertedIndexObjects.size());
+	for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) {
+	  rearrangedObjects.ids.emplace_back(invertedIndexObjects[oidx].id);
+	}
 	//NGTQ::InvertedIndexEntry<uint16_t> invertedIndexObjects(numOfSubspaces);
 	//quantizedIndex.getQuantizer().extractInvertedIndexObject(invertedIndexObjects, gid);
 	//quantizedIndex.getQuantizer().eraseInvertedIndexObject(gid);
-	NGTQ::QuantizedObjectProcessingStream quantizedStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size());
-
-	rearrange(invertedIndexObjects, rearrangedObjects, quantizedStream);
+	//rearrangeFloatObjects(invertedIndexObjects, rearrangedObjects, quantizer);
+	rearrangeObjects(invertedIndexObjects, rearrangedObjects, quantizer);
       }
 #endif
     }
@@ -404,12 +430,13 @@ namespace QBG {
     static void rearrange(NGTQ::QuantizedObjectSet &quantizedObjects, NGTQG::QuantizedNode &rearrangedObjects) {
       NGTQ::InvertedIndexEntry<uint16_t> iie;
       iie.set(quantizedObjects);
-      rearrange(iie, rearrangedObjects);
+      rearrange(iie, rearrangedObjects, *reinterpret_cast<NGTQ::Quantizer*>(0));
     }
 
     void extractRemovedIdSet(size_t objectListSize, std::vector<uint32_t> &removedIDs) {
       std::vector<bool> exist(objectListSize);
       size_t count = 0;
+      size_t duplicatedCount = 0;
       for (auto &blob : *this) {
 	for (auto id : blob.ids) {
 	  if (id >= exist.size()) {
@@ -418,13 +445,19 @@ namespace QBG {
 	    NGTThrowException(msg);
 	  }
 	  if (exist.at(id)) {
-	    std::cerr << "Warning: the object is duplicated. " << id << std::endl;
+	    if (duplicatedCount == 0) {
+	      std::cerr << "Warning: the object is duplicated. " << id << std::endl;
+	    }
+	    duplicatedCount++;
 	  } else {
 	    count++;
 	    exist.at(id) = true;
 	  }
 	}
       }
+      if (duplicatedCount > 0) {
+	std::cerr << "Warning: # of duplicated objects is " << duplicatedCount << "." << std::endl;
+      }
       {
 	removedIDs.clear();
 	removedIDs.reserve(objectListSize - count);
@@ -443,8 +476,9 @@ namespace QBG {
   
   class Index : public NGTQ::Index {
   public:
-  Index(const std::string &indexPath, bool prebuilt = false, bool verbose = false) :
-    NGTQ::Index(indexPath, prebuilt), path(indexPath), quantizedBlobGraph(*this) {
+    Index(const std::string &indexPath, bool prebuilt = false, bool verbose = false,
+	  NGTQ::DataType refinementDataType = NGTQ::DataTypeAny) :
+      NGTQ::Index(indexPath, prebuilt, refinementDataType), path(indexPath), quantizedBlobGraph(*this) {
       searchable = false;
       NGT::StdOstreamRedirector redirector(!verbose);
       redirector.begin();
@@ -562,9 +596,19 @@ namespace QBG {
 	std::vector<float>	object;
 	NGT::Common::extractVector(line, " ,\t", object);
 	if (object.empty()) {
-	  cerr << "An empty line or invalid value: " << line << endl;
+	  cerr << "Empty line or invalid value: " << line << endl;
 	  continue;
 	}
+	if ((quantizer.property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) &&
+	    (object.size() + 1 == quantizer.objectList.genuineDimension)) {
+	  object.emplace_back(0);
+	}
+	if (object.size() != quantizer.objectList.genuineDimension) {
+	  std::stringstream msg;
+	  msg << "The dimension of the specified object is inconsistent with the dimension of the index. " 
+	      << object.size() << ":" << quantizer.objectList.genuineDimension;
+	  NGTThrowException(msg);
+	}
 	index.insert(idx, object);
 
 	if (count % 100000 == 0) {
@@ -596,43 +640,9 @@ namespace QBG {
       NGT::Common::tokenize(data, tokens, ".");
       if (tokens.size() < 2) {
 	std::stringstream msg;
-	msg << "Invalid file name format";
+	msg << "Invalid file name format. " << data;
 	NGTThrowException(msg);
       }
-#ifdef NGT_INNER_PRODUCT
-      double maxMag = 0.0;
-      if (index.getQuantizer().property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) {
-	std::cerr << "Inner product." << std::endl;
-        NGT::Timer timer;
-        timer.start();
-        StaticObjectFileLoader loader(data, tokens[tokens.size() - 1]);
-        size_t count = 0;
-        while (!loader.isEmpty()) {
-          if (dataSize > 0 && count > dataSize) {
-            break;
-          }
-          auto object = loader.getObject();
-          double mag = 0.0;
-          for (auto &v : object) {
-            //std::cerr << v << ":" << mag << std::endl;
-            mag += v * v;
-          }
-          if (mag > maxMag) {
-            maxMag = mag;
-          }
-          count++;
-          if (count % 2000000 == 0) {
-            timer.stop();
-            std::cerr << "processed " << static_cast<float>(count) / 1000000.0 << "M objects."
-                      << " maxMag=" << maxMag << " time=" << timer << std::endl;
-            timer.restart();
-          }
-        }
-        timer.stop();
-        std::cerr << "time=" << timer << std::endl;
-      }
-      std::cerr << "final maxMag=" << maxMag << std::endl;
-#endif
       auto &quantizer = index.getQuantizer();
       StaticObjectFileLoader loader(data, tokens[tokens.size() - 1]);
       size_t idx = quantizer.objectList.size() == 0 ? 0 : quantizer.objectList.size() - 1;
@@ -644,16 +654,16 @@ namespace QBG {
 	  break;
 	}
 	auto object = loader.getObject();
-#ifdef NGT_INNER_PRODUCT
-	if (index.getQuantizer().property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) {
-	  double mag = 0.0;
-	  for (auto &v : object) {
-	    //std::cerr << v << ":" << mag << std::endl;
-	    mag += v * v;
-	  }
-	  object.push_back(sqrt(maxMag - mag));
+	if ((quantizer.property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) &&
+	    (object.size() + 1 == quantizer.objectList.genuineDimension)) {
+	  object.emplace_back(0);
+	}
+	if (object.size() != quantizer.objectList.genuineDimension) {
+	  std::stringstream msg;
+	  msg << "The dimension of the specified object is inconsistent with the dimension of the index. " 
+	      << object.size() << ":" << quantizer.objectList.genuineDimension;
+	  NGTThrowException(msg);
 	}
-#endif
 	index.insert(idx, object);
 	if (count % 1000000 == 0) {
 	  std::cerr << "appended " << static_cast<float>(count) / 1000000.0 << "M objects.";
@@ -699,7 +709,7 @@ namespace QBG {
 	quantizer.objectList.get(id, object, &gcodebook.getObjectSpace());
 	objects.push_back(pair<std::vector<float>, size_t>(object, id));
       }
-      vector<NGT::Index::InsertionResult> gids;
+      vector<NGT::Index::InsertionResult> gids;	
       NGTQ::Quantizer::searchIndex(gcodebook, objects, gids);
 
       for (size_t bidx = 0; bidx < gids.size(); bidx++) {
@@ -716,25 +726,47 @@ namespace QBG {
 	  msg << "remove: Not found the specified ID. " << ids[bidx];
 	  NGTThrowException(msg);
 	}
-	NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedBlobGraph.numOfSubspaces, 
-							      rearrangedObjects.ids.size());
-	quantizedStream.uncompressFromUint4(static_cast<uint8_t*>(rearrangedObjects.objects));
 	NGTQ::InvertedIndexEntry<uint16_t> invertedIndexObjects;
-	invertedIndexObjects.initialize(quantizedBlobGraph.numOfSubspaces);
-	quantizedStream.restoreToInvertedIndex(invertedIndexObjects);
+	quantizer.getQuantizedObjectDistance().restoreIntoInvertedIndex(invertedIndexObjects, quantizedBlobGraph.numOfSubspaces, rearrangedObjects.ids, rearrangedObjects.objects);
+
+	///-/ ///////////////////////////////////////
 	invertedIndexObjects.erase(invertedIndexObjects.begin() + rmidx);
-	NGTQ::QuantizedObjectProcessingStream removedQuantizedStream(quantizedBlobGraph.numOfSubspaces, 
-								     rearrangedObjects.ids.size());
-	removedQuantizedStream.arrange(invertedIndexObjects);
+        ///-/ ///////////////////////////////////////
+
 	auto ids = rearrangedObjects.ids;
 	ids.erase(ids.begin() + rmidx);
 	rearrangedObjects.ids.clear();
 	rearrangedObjects.clear();
-	rearrangedObjects.objects = removedQuantizedStream.compressIntoUint4();
+	rearrangedObjects.objects = quantizer.getQuantizedObjectDistance().generateRearrangedObjects(invertedIndexObjects);
 	rearrangedObjects.ids = std::move(ids);
       }
     }
 
+    void insertObjectsToBlob(NGT::ObjectID blobID, std::vector<std::pair<std::vector<float>, size_t>> &objects) {
+      auto &quantizer = getQuantizer();
+      auto &rearrangedObjects = quantizedBlobGraph[blobID];
+      ///-/ /////////////
+      auto subspaceID = quantizedBlobGraph[blobID].subspaceID;
+      NGTQ::InvertedIndexEntry<uint16_t> invertedIndexObjects;
+      quantizer.getQuantizedObjectDistance().restoreIntoInvertedIndex(invertedIndexObjects, quantizedBlobGraph.numOfSubspaces, rearrangedObjects.ids, rearrangedObjects.objects);
+      ///-/ ///////////////////////////////////////
+      auto idsback = rearrangedObjects.ids;
+      for (auto &b : objects) {
+	auto &object = b.first;
+	auto id = b.second;
+	NGTQ::Object tobject(object, id, subspaceID);
+	NGTQ::QuantizedObject quantizedObject;
+	quantizer.encode(subspaceID, tobject, quantizedObject);
+	invertedIndexObjects.pushBack(id, quantizedObject);
+	idsback.push_back(id);
+      }
+      ///-/ ///////////////////////////////////////
+      rearrangedObjects.ids.clear();	  
+      rearrangedObjects.clear();
+      rearrangedObjects.objects = quantizer.getQuantizedObjectDistance().generateRearrangedObjects(invertedIndexObjects);
+      rearrangedObjects.ids = std::move(idsback);
+    }
+
     template<typename T>
     NGT::ObjectID insert(std::vector<T> &object) {
       std::vector<std::vector<T>> objects;
@@ -780,6 +812,10 @@ namespace QBG {
 	  rmids.push_back(id);
 	}
 	ids.push_back(id);
+	if ((quantizer.property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) &&
+	    (obj.size() + 1 == quantizer.objectList.genuineDimension)) {
+	  obj.emplace_back(0);
+	}
 	if (obj.size() != quantizer.property.genuineDimension) {
 	  ids.clear();
 	  std::stringstream msg;
@@ -801,7 +837,7 @@ namespace QBG {
 	}
       }
       auto &gcodebook = static_cast<NGT::GraphAndTreeIndex &>(quantizer.globalCodebookIndex.getIndex());
-      vector<NGT::Index::InsertionResult> gids;
+      vector<NGT::Index::InsertionResult> gids;	
       NGTQ::Quantizer::searchIndex(gcodebook, floatObjects, gids);
 
       if (gids.size() != floatObjects.size()) {
@@ -828,31 +864,7 @@ namespace QBG {
       for (size_t idx = 0; idx < vbatchObjects.size(); idx++) {
 	auto &it = vbatchObjects[idx];
 	auto blobID = (*it).first;
-	auto &rearrangedObjects = quantizedBlobGraph[blobID];
-	NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedBlobGraph.numOfSubspaces, 
-							      rearrangedObjects.ids.size());
-	quantizedStream.uncompressFromUint4(static_cast<uint8_t*>(rearrangedObjects.objects));
-	NGTQ::InvertedIndexEntry<uint16_t> invertedIndexObjects;
-	invertedIndexObjects.initialize(quantizedBlobGraph.numOfSubspaces);
-	quantizedStream.restoreToInvertedIndex(invertedIndexObjects);
-	auto subspaceID = quantizedBlobGraph[blobID].subspaceID;
-	auto idsback = rearrangedObjects.ids;
-	for (auto &b : (*it).second) {
-	  auto &object = b.first;
-	  auto id = b.second;
-	  NGTQ::Object tobject(object, id, subspaceID);
-	  NGTQ::QuantizedObject quantizedObject;
-	  quantizer.encode(subspaceID, tobject, quantizedObject);
-	  invertedIndexObjects.pushBack(id, quantizedObject);
-	  idsback.push_back(id);
-	}
-	NGTQ::QuantizedObjectProcessingStream updatedQuantizedStream(quantizedBlobGraph.numOfSubspaces, 
-								     invertedIndexObjects.size());
-	updatedQuantizedStream.arrange(invertedIndexObjects);
-	rearrangedObjects.ids.clear();
-	rearrangedObjects.clear();
-	rearrangedObjects.objects = updatedQuantizedStream.compressIntoUint4();
-	rearrangedObjects.ids = std::move(idsback);
+	insertObjectsToBlob(blobID, (*it).second);
       }
       return;
     }
@@ -949,6 +961,146 @@ namespace QBG {
       redirector.end();
     }
 
+    static void expandBlob(std::string qbgIndexPath, std::string clusterCentroidsPath, 
+			   NGT::SearchContainer &ngtSearchContainer,
+			   QBG::SearchContainer &qbgSearchContainer,
+			   float rate,
+			   NGTQ::DataType refinementDataType,
+			   bool verbose = false) {
+
+      auto extractNeighbors = [](std::vector<std::vector<float>> &objects,
+				 std::vector<uint32_t> &sizes,
+				 QBG::Index &qbg, size_t &gidx,
+				 NGT::SearchContainer &searchContainer,
+				 QBG::SearchContainer qbgSearchContainer,
+				 std::vector<std::vector<NGT::ObjectID>> &nearestNeighbors) {
+	NGT::Index &gcodebook = qbg.getQuantizer().globalCodebookIndex;
+#pragma omp parallel for
+	for (size_t oidx = 0; oidx < objects.size(); oidx++) {
+	  //-/std::cerr << "oidx=" << oidx << std::endl;
+	  auto gtarget = gidx + oidx;
+	  {
+	    NGT::SearchQuery sq(objects[oidx]);
+	    if (gtarget >= gcodebook.getObjectRepositorySize()) {
+	      std::stringstream msg;
+	      msg << "Cluster centroids file has more entries than global codebook. " 
+		  << gtarget << ":" << gcodebook.getObjectRepositorySize();
+	      NGTThrowException(msg);
+	    }
+	    static_cast<NGT::SearchContainer&>(sq) = searchContainer;
+	    NGT::ObjectDistances neighbors;
+	    sq.setResults(&neighbors);
+	    gcodebook.search(sq);
+	    if (gtarget + 1 != neighbors[0].id) {
+	      std::cerr << "extpandClusters: Warning! " << gtarget << ":" << neighbors[0].id << std::endl;
+	      auto found = false;
+	      for (size_t i = 1; i < neighbors.size(); i++) {
+		std::cerr << neighbors[i].id << ":" << neighbors[i].distance << std::endl;
+		if (gtarget + 1 == neighbors[i].id) {
+		  found = true;
+		  std::cerr << "Found" << std::endl;
+		  break;
+		}
+	      }
+	      if (!found) {
+		std::cerr << "extpandClusters: Strong warning! " << gtarget << std::endl;
+	      }
+	      neighbors[0].id = gtarget + 1;
+	    }
+	  }
+	  {
+	    NGT::ObjectDistances neighbors;
+	    QBG::SearchContainer sc(qbgSearchContainer);
+	    sc.setObjectVector(objects[oidx]);
+	    sc.setSize(sizes[oidx]);
+            sc.setResults(&neighbors);
+	    qbg.searchInTwoSteps(sc);
+	    for (auto &n : neighbors) {
+	      nearestNeighbors[gtarget].emplace_back(n.id);
+	    }
+	  }
+	}
+	gidx += objects.size();
+	objects.clear();
+	sizes.clear();
+      };
+      
+      NGT::StdOstreamRedirector redirector(!verbose);
+      redirector.begin();
+
+      auto prebuilt = false;
+      QBG::Index qbg(qbgIndexPath, prebuilt, verbose, refinementDataType);
+      if (clusterCentroidsPath.empty()) {
+	clusterCentroidsPath = QBG::Index::getStoredBlobFile(qbgIndexPath);
+      }
+      std::ifstream stream(clusterCentroidsPath);
+      if (!stream) {
+	std::stringstream msg;
+	msg << "Cannot open the centroid list file. " << clusterCentroidsPath;
+	NGTThrowException(msg);
+      }
+      auto &quantizer = qbg.getQuantizer();
+      auto &gcodebook = quantizer.globalCodebookIndex;
+      std::string line;
+      if (gcodebook.getObjectRepositorySize() == 0) {
+	NGTThrowException("Global codebook index is empty.");
+      }
+
+      if (verbose) {
+	std::cerr << "qbg search container size=" << qbgSearchContainer.size << std::endl;
+	std::cerr << "repo size=" << gcodebook.getObjectRepositorySize() << std::endl;
+      }
+
+      std::vector<std::vector<NGT::ObjectID>> nearestNeighbors(gcodebook.getObjectRepositorySize() - 1);
+      std::vector<std::vector<float>> objects;
+      std::vector<uint32_t> sizes;
+      size_t gidx = 0;
+      while (getline(stream, line)) {
+	std::vector<std::string> tokens;
+	NGT::Common::tokenize(line, tokens, " \t");
+	std::vector<float> object;
+	for (auto &token : tokens) {
+	  object.emplace_back(NGT::Common::strtof(token));
+	}
+	objects.emplace_back(object);
+	if (rate < 0.0) {
+	  sizes.emplace_back(qbgSearchContainer.size);
+	} else {
+	  sizes.emplace_back(qbg.quantizedBlobGraph[gidx + 1].ids.size() * (1.0 + rate));
+	}
+	if (objects.size() == 10) {
+	  extractNeighbors(objects, sizes, qbg, gidx, ngtSearchContainer, qbgSearchContainer,
+			   nearestNeighbors);
+	}
+      }
+      if (objects.size() > 0) {
+	extractNeighbors(objects, sizes, qbg, gidx, ngtSearchContainer, qbgSearchContainer,
+			 nearestNeighbors);
+      }
+      size_t nOfAddedObjects = 0;
+      for (size_t gidx = 0; gidx < nearestNeighbors.size(); gidx++) {
+	NGT::ObjectID blobID = gidx + 1;
+	auto &rearrangedObjects = qbg.quantizedBlobGraph[blobID];
+	auto &ids = rearrangedObjects.ids;
+	std::unordered_set<NGT::ObjectID> blob(ids.begin(), ids.end());
+	std::vector<std::pair<std::vector<float>, size_t>> objects;
+	for (auto &id : nearestNeighbors[gidx]) {
+	  if (blob.find(id) == blob.end()) {
+	    std::vector<float> object;
+	    qbg.getQuantizer().objectList.get(id, object);
+	    objects.emplace_back(std::make_pair(object, id));
+	  }
+	}
+	nOfAddedObjects += objects.size();
+        qbg.insertObjectsToBlob(blobID, objects);
+      }
+      if (verbose) {
+	std::cerr << "# of added objects=" << nOfAddedObjects << " the mean # of added objects=" << nOfAddedObjects / nearestNeighbors.size() << std::endl;
+      }
+      qbg.save();
+      redirector.end();
+    }
+
     void getSeeds(NGT::Index &index, NGT::Object *object, NGT::ObjectDistances &seeds, size_t noOfSeeds) {
       auto &graph = static_cast<NGT::GraphAndTreeIndex&>(index.getIndex());
       NGT::SearchContainer sc(*object);
@@ -976,16 +1128,27 @@ namespace QBG {
       judge(NGTQG::QuantizedNode &ivi, size_t k, NGT::Distance radius,
 	    NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut,
 	    NGT::NeighborhoodGraph::ResultSet &result, size_t &foundCount
+	    , void *query = 0, std::unique_ptr<NGTQ::BooleanSet> *checkedIDs = 0
 	    ) {
       auto noOfObjects = ivi.ids.size();
-      float distances[NGTQ::QuantizedObjectProcessingStream::getNumOfAlignedObjects(noOfObjects)];
       auto &quantizedObjectDistance = getQuantizer().getQuantizedObjectDistance();
+      std::vector<float> distances(quantizedObjectDistance.getNumOfAlignedObjects(noOfObjects));
+      if (checkedIDs != 0) {
+	for (size_t idx = 0; idx < ivi.ids.size(); idx++) {
+	  auto id = ivi.ids[idx];
+	  if ((**checkedIDs)[id]) {
+	    distances[idx] = 1.0;
+	  } else {
+	    //std::cerr << "non checked" << std::endl;
+	    (**checkedIDs).set(id);
+	  }
+	}
+      }
 #ifdef NGTQBG_MIN
-      float distance = quantizedObjectDistance(ivi.objects, &distances[0], noOfObjects, lut);
+      float distance = quantizedObjectDistance(ivi.objects, &distances[0], noOfObjects, lut, query);
 #else
-      quantizedObjectDistance(ivi.objects, &distances[0], noOfObjects, lut);
+      quantizedObjectDistance(ivi.objects, &distances[0], noOfObjects, lut, query);
 #endif
-
 #ifdef NGTQBG_MIN
       if (distance >= radius) {
 	return std::make_pair(distance, radius);
@@ -1013,20 +1176,32 @@ namespace QBG {
     }
 
 
-    static void refineDistances(QBG::SearchContainer &searchContainer, NGTQ::Quantizer &quantizer,
+    static float refineDistances(NGTQ::Quantizer &quantizer,
 				NGT::NeighborhoodGraph::ResultSet &result,
-				NGT::ObjectDistances &qresults) {
-      auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace();
+				NGT::ObjectDistances &qresults,
+				size_t exactResultSize,
+				std::unique_ptr<std::vector<float>> &resizedQuery) {
+      float err;
+      NGT::ObjectSpace *objectSpace;
+      if (quantizer.refinementObjectSpace != 0) {
+	objectSpace = quantizer.refinementObjectSpace;
+      } else if (quantizer.refinementObjectSpaceForObjectList != 0) {
+	objectSpace = quantizer.refinementObjectSpaceForObjectList;
+      } else {
+	std::stringstream msg;
+	msg << "Fatal inner error! Any refinement object space is unavailable.";
+	NGTThrowException(msg);
+      }
       NGT::ResultPriorityQueue qres;
-      if (objectSpace.getObjectType() == typeid(float)) {
-	refineDistances<float>(searchContainer, quantizer, result, qres);
-      } else if (objectSpace.getObjectType() == typeid(uint8_t)) {
-	refineDistances<uint8_t>(searchContainer, quantizer, result, qres);
-      } else if (objectSpace.getObjectType() == typeid(NGT::float16)) {
-	refineDistances<NGT::float16>(searchContainer, quantizer, result, qres);
+      if (objectSpace->getObjectType() == typeid(float)) {
+	err = refineDistances<float>(quantizer, result, qres, exactResultSize, resizedQuery);
+      } else if (objectSpace->getObjectType() == typeid(uint8_t)) {
+	err = refineDistances<uint8_t>(quantizer, result, qres, exactResultSize, resizedQuery);
+      } else if (objectSpace->getObjectType() == typeid(NGT::float16)) {
+	err = refineDistances<NGT::float16>(quantizer, result, qres, exactResultSize, resizedQuery);
       } else {
 	std::stringstream msg;
-	msg << "refineDistances: Fatal error! Invalid datatype. " << objectSpace.getObjectType().name() << std::endl;
+	msg << "refineDistances: Fatal error! Invalid datatype. " << objectSpace->getObjectType().name() << std::endl;
 	NGTThrowException(msg);
       }
       qresults.resize(qres.size());
@@ -1034,18 +1209,21 @@ namespace QBG {
 	qresults[i] = qres.top();
 	qres.pop();
       }
+      return err;
     }
 
-    static void refineDistances(QBG::SearchContainer &searchContainer, NGTQ::Quantizer &quantizer,
+    static float refineDistances(NGTQ::Quantizer &quantizer,
 				NGT::NeighborhoodGraph::ResultSet &result,
-				NGT::ResultPriorityQueue &qresults) {
-      auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace();
+				NGT::ResultPriorityQueue &qresults,
+				size_t exactResultSize,
+				std::unique_ptr<std::vector<float>> &resizedQuery) {
+      auto &objectSpace = *quantizer.refinementObjectSpace;
       if (objectSpace.getObjectType() == typeid(float)) {
-	refineDistances<float>(searchContainer, quantizer, result, qresults);
+	return refineDistances<float>(quantizer, result, qresults, exactResultSize, resizedQuery);
       } else if (objectSpace.getObjectType() == typeid(uint8_t)) {
-	refineDistances<uint8_t>(searchContainer, quantizer, result, qresults);
+	return refineDistances<uint8_t>(quantizer, result, qresults, exactResultSize, resizedQuery);
       } else if (objectSpace.getObjectType() == typeid(NGT::float16)) {
-	refineDistances<NGT::float16>(searchContainer, quantizer, result, qresults);
+	return refineDistances<NGT::float16>(quantizer, result, qresults, exactResultSize, resizedQuery);
       } else {
 	std::stringstream msg;
 	msg << "refineDistances: Fatal error! Invalid datatype. " << objectSpace.getObjectType().name() << std::endl;
@@ -1054,30 +1232,34 @@ namespace QBG {
     }
 
     template<typename T>
-    static void refineDistances(QBG::SearchContainer &searchContainer, NGTQ::Quantizer &quantizer,
+    static float refineDistances(NGTQ::Quantizer &quantizer,
 				NGT::NeighborhoodGraph::ResultSet &result,
-				NGT::ResultPriorityQueue &qresults) {
+				NGT::ResultPriorityQueue &qresults,
+				size_t exactResultSize,
+				std::unique_ptr<std::vector<float>> &resizedQuery) {
       qresults = NGT::ResultPriorityQueue();
-      NGT::Object &query = searchContainer.object;
-      auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace();
-      auto paddedDimension = objectSpace.getPaddedDimension();
-      const size_t prefetchSize = objectSpace.getPrefetchSize();
 #ifdef NGTQ_OBJECT_IN_MEMORY
-      if (quantizer.objectListOnMemory.size() != 0) {
+      if (quantizer.refinementObjectSpace != 0) {
+	auto &os = *quantizer.refinementObjectSpace;
+	auto &repo = os.getRepository();
+	auto &comparator = os.getComparator();
+	auto *q = os.allocateNormalizedObject(*resizedQuery);
 	while (!result.empty()) {
 	  auto r = result.top();
 	  result.pop();
-	  NGT::Object &object = *quantizer.objectListOnMemory.get(r.id);
-	  if (!result.empty()) {
-            uint8_t *ptr = static_cast<uint8_t*>(quantizer.objectListOnMemory.get(result.top().id)->getPointer());
-	    NGT::MemoryCache::prefetch(ptr, prefetchSize);
+	  {
+	    r.distance = comparator(*q, *repo.get(r.id));
+	    //r.distance = comparator(*query, *repo.get(r.id));
+	    qresults.push(r);
 	  }
-	  r.distance = objectSpace.getComparator()(query, object);
-	  qresults.push(r);
 	}
-      } else {
+	os.deleteObject(q);
+      } else if (quantizer.refinementObjectSpaceForObjectList != 0) {
 #endif
 	auto threadid = omp_get_thread_num();
+	auto &os = *quantizer.refinementObjectSpaceForObjectList;
+	auto &comparator = os.getComparator();
+	auto *q = os.allocateNormalizedObject(*resizedQuery);
 	while (!result.empty()) {
 	  auto r = result.top();
 	  result.pop();
@@ -1087,71 +1269,559 @@ namespace QBG {
 #else
 	  quantizer.objectList.get(r.id, object);
 #endif
-	  r.distance = NGT::PrimitiveComparator::compareL2(static_cast<T*>(query.getPointer()),
-							   static_cast<T*>(object.data()), paddedDimension);
-
-
+	  auto *o = os.allocateNormalizedObject(object);
+	  r.distance = comparator(*q, *o);
+	  os.deleteObject(o);
 	  qresults.push(r);
 	}
+	os.deleteObject(q);
 #ifdef NGTQ_OBJECT_IN_MEMORY
       }
 #endif
-      while (qresults.size() > searchContainer.exactResultSize) {
+      while (qresults.size() > exactResultSize) {
 	qresults.pop();
       }
+      return 0.0;
+    }
+
+    void searchInTwoSteps(QBG::BatchSearchContainer &searchContainer) {
+      if (searchContainer.numOfQueries == 0) {
+	NGTThrowException("search: object is null.");
+      }
+      auto parameterSize = searchContainer.size;
+      auto parameterExactResultSize = searchContainer.size;
+      if (searchContainer.refinementExpansion >= 1.0) {
+        parameterSize *= searchContainer.refinementExpansion;
+      } else {
+	parameterExactResultSize = 0;
+      }
+      NGT::Timer timer;
+      timer.start();
+      auto &quantizer = getQuantizer();
+      auto &globalIndex = quantizer.globalCodebookIndex;
+      std::vector<NGT::ObjectDistances> nearestBlobs(searchContainer.numOfQueries);
+
+
+#pragma omp parallel for
+      for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+#if 1
+	std::vector<float> qobj(searchContainer.dimension);
+	memcpy(qobj.data(), searchContainer.getQuery(qi), searchContainer.dimension * sizeof(float));
+	float mag = 0;
+	for (size_t i = 0; i < quantizer.property.genuineDimension; i++) {
+	  mag += qobj[i] * qobj[i];
+	}
+	if (quantizer.property.maxMagnitude - mag > 0.0) {
+	  qobj[quantizer.property.genuineDimension] = sqrt(quantizer.property.maxMagnitude - mag);
+	} else {
+	  qobj[quantizer.property.genuineDimension] = 0.0;
+	}
+	auto &globalGraph = static_cast<NGT::GraphAndTreeIndex&>(globalIndex.getIndex());
+	NGT::ObjectDistances seeds;
+	{
+	  NGT::Object *query = globalIndex.getObjectSpace().allocateNormalizedPersistentObject(qobj);
+	  NGT::SearchContainer sc(*query);
+	  sc.setSize(500);
+	  try {
+	    globalGraph.getSeedsFromTree(sc, seeds);
+	  } catch(NGT::Exception &err) {
+	    globalIndex.deleteObject(query);
+	    throw err;
+	  }
+	  globalIndex.deleteObject(query);
+	}
+	{
+	  qobj[quantizer.property.genuineDimension] = 0.0;
+	  NGT::Object *query = globalIndex.getObjectSpace().allocateNormalizedPersistentObject(qobj);
+	  NGT::SearchContainer sc(searchContainer, *query);
+	  sc.setResults(&nearestBlobs[qi]);
+	  sc.setEpsilon(searchContainer.blobExplorationCoefficient - 1.0);
+	  sc.setSize(searchContainer.numOfProbes);
+	  try {
+	    globalIndex.search(sc, seeds);
+	  } catch(NGT::Exception &err) {
+	    globalIndex.deleteObject(query);
+	    throw err;
+	  }
+	  globalIndex.deleteObject(query);
+	}
+#else
+	std::vector<float> qobj(searchContainer.dimension);
+	memcpy(qobj.data(), searchContainer.getQuery(qi), searchContainer.dimension * sizeof(float));
+	//NGT::Object *query = globalIndex.allocateObject(qobj);
+	NGT::Object *query = globalIndex.getObjectSpace().allocateNormalizedPersistentObject(qobj);
+	//NGT::Object *query = allocateObject(q);
+	NGT::SearchContainer sc(searchContainer, *query);
+	sc.setResults(&nearestBlobs[qi]);
+	sc.setEpsilon(searchContainer.blobExplorationCoefficient - 1.0);
+	sc.setSize(searchContainer.numOfProbes);
+	globalIndex.search(sc);
+	globalIndex.deleteObject(query);
+#endif
+      }
+      std::unordered_map<size_t, std::vector<uint32_t>> blobs;
+      for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+	for (size_t i = 0; i < nearestBlobs[qi].size(); i++) {
+	  auto blobID = nearestBlobs[qi][i].id;
+	  if (blobs.find(blobID) == blobs.end()) {
+	    blobs.insert({blobID, std::vector<uint32_t>()});
+	  }
+	  blobs[blobID].emplace_back(qi);
+	}
+      }
+      //-/ ˙並列化のために形式変更
+      std::vector<std::pair<size_t, std::vector<uint32_t>>> blobList;
+      blobList.reserve(blobs.size());
+      for (auto &v : blobs) {
+	blobList.emplace_back(v);
+      }
+      auto &quantizedObjectDistance = getQuantizer().getQuantizedObjectDistance();
+      auto dimension = searchContainer.dimension;
+      auto *fqueries = new float[searchContainer.numOfQueries * dimension];
+      auto *cqueries = new uint8_t[searchContainer.numOfQueries * dimension];
+      void *transformedQueries = fqueries;
+      float offset = 0.0;
+      float scale = -1.0;
+      NGT::ObjectSpace::ObjectType objectType = NGT::ObjectSpace::ObjectTypeNone;
+      switch(quantizer.property.localClusterDataType) {
+	case NGTQ::ClusterDataTypeSQSU8:
+	  objectType = NGT::ObjectSpace::ObjectType::Qsuint8; break;
+        default: break;
+      }
+      if (objectType != NGT::ObjectSpace::ObjectTypeNone) {
+	offset = getQuantizer().property.scalarQuantizationOffset;
+	scale = getQuantizer().property.scalarQuantizationScale;
+	transformedQueries = cqueries;
+      }
+#pragma omp parallel for
+      for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+	auto *fq = fqueries + dimension * qi;
+	memcpy(fq, searchContainer.getQuery(qi), dimension * sizeof(float));
+#if defined(NGTQG_ROTATION)
+	if (quantizedObjectDistance.rotation != 0) {
+	  quantizedObjectDistance.rotation->mul(fq);
+	}
+#endif
+	if (objectType != NGT::ObjectSpace::ObjectTypeNone) {
+          NGT::ObjectSpace::quantizeToQint8(fq, dimension, cqueries + dimension * qi, objectType, offset, scale);
+	}
+      }
+      std::vector<std::vector<float>> distances(blobList.size());
+#pragma omp parallel for
+      for (size_t bi = 0; bi < blobList.size(); bi++) {
+	auto blobID = blobList[bi].first;
+	auto noOfObjects = quantizedBlobGraph[blobID].ids.size();
+	auto *lut = reinterpret_cast<NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 *>(0);
+	std::vector<uint32_t> &queryList = blobList[bi].second;
+	distances[bi].resize(noOfObjects * queryList.size());
+	quantizedObjectDistance(quantizedBlobGraph[blobID].objects, distances[bi].data(), noOfObjects, *lut, 
+	                        transformedQueries, queryList);
+      }
+      delete[] fqueries;
+      delete[] cqueries;
+#define LOGIC6
+#if defined(LOGIC1)
+      searchContainer.batchResult.clear();
+      searchContainer.batchResult.resize(searchContainer.numOfQueries);
+      for (size_t bi = 0; bi < blobList.size(); bi++) {
+	auto blobID = blobList[bi].first;
+	//auto subspaceID = quantizedBlobGraph[blobID].subspaceID;
+	std::vector<uint32_t> &queryList = blobList[bi].second;
+	auto noOfObjects = quantizedBlobGraph[blobID].ids.size();
+	for (size_t qi = 0; qi < queryList.size(); qi++) {
+	  //std::cerr << "q=" << queryList[qi] << ":" << std::endl;
+	  for (size_t di = 0; di < noOfObjects; di++) {
+	    //std::cerr << "res=" << quantizedBlobGraph[blobID].ids[di] << " d=" << distances[bi][qi * noOfObjects + di] << std::endl;
+	    searchContainer.batchResult[queryList[qi]].emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di],
+								    distances[bi][qi * noOfObjects + di] ));
+	  }
+	}
+      }
+#pragma omp parallel for
+      for (size_t i = 0; i < searchContainer.batchResult.size(); i++) {
+	std::sort(searchContainer.batchResult[i].begin(),
+		  searchContainer.batchResult[i].end());
+	searchContainer.batchResult[i].resize(parameterSize);
+      }
+#elif defined(LOGIC2)
+      std::vector<std::priority_queue<NGT::ObjectDistance, std::vector<NGT::ObjectDistance>, 
+	std::less<NGT::ObjectDistance>>> resultSet(searchContainer.numOfQueries);
+      for (size_t bi = 0; bi < blobList.size(); bi++) {
+	auto blobID = blobList[bi].first;
+	//auto subspaceID = quantizedBlobGraph[blobID].subspaceID;
+	std::vector<uint32_t> &queryList = blobList[bi].second;
+	auto noOfObjects = quantizedBlobGraph[blobID].ids.size();
+	for (size_t qi = 0; qi < queryList.size(); qi++) {
+	  //std::cerr << "q=" << queryList[qi] << ":" << std::endl;
+	  for (size_t di = 0; di < noOfObjects; di++) {
+	    resultSet[queryList[qi]].push(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di],
+							      distances[bi][qi * noOfObjects + di]));
+	    //searchContainer.batchResult[queryList[qi]].emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di],
+	    //							    distances[bi][qi * noOfObjects + di] ));
+	    if (resultSet[queryList[qi]].size() > parameterSize) {
+	      resultSet[queryList[qi]].pop();
+	    }
+	  }
+	}
+      }
+      searchContainer.batchResult.clear();
+      searchContainer.batchResult.resize(searchContainer.numOfQueries);
+#pragma omp parallel for
+      for (size_t qi = 0; qi < resultSet.size(); qi++) {
+        searchContainer.batchResult[qi].resize(resultSet[qi].size());
+	while (!resultSet[qi].empty()) {
+          searchContainer.batchResult[qi][resultSet[qi].size() - 1] = std::move(resultSet[qi].top());
+	  resultSet[qi].pop();
+	}
+      }
+#elif defined(LOGIC3)
+      auto nOfThreads = omp_get_max_threads();
+      std::vector<std::priority_queue<NGT::ObjectDistance, std::vector<NGT::ObjectDistance>, 
+	std::less<NGT::ObjectDistance>>> resultSet(searchContainer.numOfQueries * nOfThreads);
+#pragma omp parallel for
+      for (size_t bi = 0; bi < blobList.size(); bi++) {
+        auto thdID = omp_get_thread_num();
+	auto thdIdx = thdID * searchContainer.numOfQueries;
+	auto blobID = blobList[bi].first;
+	std::vector<uint32_t> &queryList = blobList[bi].second;
+	auto noOfObjects = quantizedBlobGraph[blobID].ids.size();
+	for (size_t qi = 0; qi < queryList.size(); qi++) {
+	  for (size_t di = 0; di < noOfObjects; di++) {
+	    auto &rset = resultSet[thdIdx + queryList[qi]];
+	    auto d = distances[bi][qi * noOfObjects + di];
+	    if (rset.size() < parameterSize) {
+	      rset.push(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d));
+	    } else if (rset.top().distance >= d) {
+	      rset.push(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d));
+	      rset.pop();
+	    }
+	  }
+	}
+      }
+#pragma omp parallel for
+      for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+	auto &rset = resultSet[qi];
+        for (size_t ti = 1; ti < nOfThreads; ti++) {
+	  auto thdIdx = ti * searchContainer.numOfQueries;
+          while (!resultSet[thdIdx + qi].empty()) {
+            if (rset.size() < parameterSize) {
+	      rset.push(resultSet[thdIdx + qi].top());
+	    } else if (rset.top().distance >= resultSet[thdIdx + qi].top().distance) {
+	      rset.push(resultSet[thdIdx + qi].top());
+	      rset.pop();
+	    }
+	    resultSet[thdIdx + qi].pop();
+          }
+        }
+      }
+      searchContainer.batchResult.clear();
+      searchContainer.batchResult.resize(searchContainer.numOfQueries);
+#pragma omp parallel for
+      for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+        searchContainer.batchResult[qi].resize(resultSet[qi].size());
+	while (!resultSet[qi].empty()) {
+          searchContainer.batchResult[qi][resultSet[qi].size() - 1] = std::move(resultSet[qi].top());
+	  resultSet[qi].pop();
+	}
+      }
+#elif defined(LOGIC4)
+      auto nOfThreads = omp_get_max_threads();
+      std::vector<NGT::ObjectDistances> resultSet(searchContainer.numOfQueries * nOfThreads);
+#pragma omp parallel for
+      for (size_t bi = 0; bi < blobList.size(); bi++) {
+        auto thdID = omp_get_thread_num();
+	auto thdIdx = thdID * searchContainer.numOfQueries;
+	auto blobID = blobList[bi].first;
+	//auto subspaceID = quantizedBlobGraph[blobID].subspaceID;
+	std::vector<uint32_t> &queryList = blobList[bi].second;
+	auto noOfObjects = quantizedBlobGraph[blobID].ids.size();
+	for (size_t qi = 0; qi < queryList.size(); qi++) {
+	  auto &rset = resultSet[thdIdx + queryList[qi]];
+	  //std::cerr << "q=" << queryList[qi] << ":" << std::endl;
+	  for (size_t di = 0; di < noOfObjects; di++) {
+	    auto d = distances[bi][qi * noOfObjects + di];
+	    //std::cerr << quantizedBlobGraph[blobID].ids[di] << ":" << d << std::endl;
+	    rset.emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d));
+	  }
+	}
+      }
+      searchContainer.batchResult.clear();
+      searchContainer.batchResult.resize(searchContainer.numOfQueries);
 
+#pragma omp parallel for
+      for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+        for (size_t ti = 0; ti < nOfThreads; ti++) {
+	  auto thdIdx = ti * searchContainer.numOfQueries;
+	  std::sort(resultSet[thdIdx + qi].begin(), resultSet[thdIdx + qi].end());
+	  if (resultSet[thdIdx + qi].size() > parameterSize) {
+	    resultSet[thdIdx + qi].resize(parameterSize);
+	  }
+	  for (auto &obj : resultSet[thdIdx + qi]) {
+	    searchContainer.batchResult[qi].emplace_back(obj);
+          }
+        }
+	std::sort(searchContainer.batchResult[qi].begin(), 
+		  searchContainer.batchResult[qi].end());
+	searchContainer.batchResult[qi].resize(parameterSize);
+      }
+#elif defined(LOGIC5)
+      auto nOfThreads = omp_get_max_threads();
+      std::vector<NGT::ObjectDistances> resultSet(searchContainer.numOfQueries * nOfThreads);
+      std::vector<std::pair<float, int32_t>> max(searchContainer.numOfQueries * nOfThreads,
+						 std::pair<float, uint32_t>(FLT_MAX, 0));
+#pragma omp parallel for
+      for (size_t bi = 0; bi < blobList.size(); bi++) {
+        auto thdID = omp_get_thread_num();
+	auto thdIdx = thdID * searchContainer.numOfQueries;
+	auto blobID = blobList[bi].first;
+	std::vector<uint32_t> &queryList = blobList[bi].second;
+	auto noOfObjects = quantizedBlobGraph[blobID].ids.size();
+	for (size_t qi = 0; qi < queryList.size(); qi++) {
+	  auto &rset = resultSet[thdIdx + queryList[qi]];
+	  auto &mx = max[thdIdx + queryList[qi]];
+	  //std::cerr << "q=" << queryList[qi] << ":" << std::endl;
+	  for (size_t di = 0; di < noOfObjects; di++) {
+	    auto d = distances[bi][qi * noOfObjects + di];
+	    //std::cerr << quantizedBlobGraph[blobID].ids[di] << ":" << d << std::endl;
+	    if (d < mx.first) {
+	      if (rset.size() >= parameterSize) {
+		rset[mx.second] = std::move(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d));
+		mx.first = -1.0;
+		mx.second = -1;
+		for (auto it = rset.begin(); it != rset.end(); ++it) {
+		  if ((*it).distance > mx.first) {
+		    mx.first = (*it).distance;
+		    mx.second = std::distance(rset.begin(), it);
+		  }
+		}
+	      } else {
+		rset.emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d));
+		if (rset.size() >= parameterSize) {
+		  mx.first = -1.0;
+		  mx.second = -1;
+		  for (auto it = rset.begin(); it != rset.end(); ++it) {
+		    if ((*it).distance > mx.first) {
+		      mx.first = (*it).distance;
+		      mx.second = std::distance(rset.begin(), it);
+		    }
+		  }
+		}
+	      }
+	    }
+	  }
+	}
+      }
+
+      searchContainer.batchResult.clear();
+      searchContainer.batchResult.resize(searchContainer.numOfQueries);
+#pragma omp parallel for
+      for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+        for (size_t ti = 0; ti < nOfThreads; ti++) {
+	  auto thdIdx = ti * searchContainer.numOfQueries;
+	  for (auto &obj : resultSet[thdIdx + qi]) {
+	    searchContainer.batchResult[qi].emplace_back(obj);
+          }
+        }
+	std::sort(searchContainer.batchResult[qi].begin(), 
+		  searchContainer.batchResult[qi].end());
+	if (searchContainer.batchResult[qi].size() > parameterSize) {
+	  searchContainer.batchResult[qi].resize(parameterSize);
+	}
+      }
+#elif defined(LOGIC6)
+      auto nOfThreads = omp_get_max_threads();
+      std::vector<NGT::ObjectDistances> resultSet(searchContainer.numOfQueries * nOfThreads);
+      std::vector<float> max(searchContainer.numOfQueries * nOfThreads, FLT_MAX);
+#pragma omp parallel for
+      for (size_t bi = 0; bi < blobList.size(); bi++) {
+        auto thdID = omp_get_thread_num();
+	auto thdIdx = thdID * searchContainer.numOfQueries;
+	auto blobID = blobList[bi].first;
+	//auto subspaceID = quantizedBlobGraph[blobID].subspaceID;
+	std::vector<uint32_t> &queryList = blobList[bi].second;
+	auto noOfObjects = quantizedBlobGraph[blobID].ids.size();
+	for (size_t qi = 0; qi < queryList.size(); qi++) {
+	  auto &rset = resultSet[thdIdx + queryList[qi]];
+	  auto &mx = max[thdIdx + queryList[qi]];
+	  for (size_t di = 0; di < noOfObjects; di++) {
+	    auto d = distances[bi][qi * noOfObjects + di];
+	    if (d < mx) {
+	      rset.emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d));
+	      if (rset.size() >= parameterSize * 2) {
+		std::sort(rset.begin(), rset.end());
+		for (auto it = rset.begin(); it + 1 != rset.end();) {
+		  if ((*it).id == (*(it + 1)).id) {
+		    it = rset.erase(it);
+		  } else {
+		    ++it;
+		  }
+		}
+		rset.resize(parameterSize);
+		mx = rset.back().distance;
+	      }
+	    }
+	  }
+	  if (rset.size() > 0) {
+	    std::sort(rset.begin(), rset.end());
+	    for (auto it = rset.begin(); it + 1 != rset.end();) {
+	      if ((*it).id == (*(it + 1)).id) {
+		it = rset.erase(it);
+	      } else {
+		++it;
+	      }
+	    }
+	    if (rset.size() > parameterSize) {
+	      rset.resize(searchContainer.size);
+	    }
+	  }
+	}
+      }
+
+      searchContainer.batchResult.clear();
+      searchContainer.batchResult.resize(searchContainer.numOfQueries);
+#pragma omp parallel for
+      for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+        for (int ti = 0; ti < nOfThreads; ti++) {
+	  auto thdIdx = ti * searchContainer.numOfQueries;
+	  for (auto &obj : resultSet[thdIdx + qi]) {
+	    searchContainer.batchResult[qi].emplace_back(obj);
+          }
+        }
+	if (searchContainer.batchResult[qi].size() > 0) {
+	  std::sort(searchContainer.batchResult[qi].begin(), 
+		    searchContainer.batchResult[qi].end());
+	  for (auto it = searchContainer.batchResult[qi].begin();
+	       it + 1 != searchContainer.batchResult[qi].end();) {
+	    if ((*it).id == (*(it + 1)).id) {
+	      it = searchContainer.batchResult[qi].erase(it);
+	    } else {
+	      ++it;
+	    }
+	  }
+	  if (searchContainer.batchResult[qi].size() > parameterSize) {
+	    searchContainer.batchResult[qi].resize(parameterSize);
+	  }
+	}
+      }
+#endif
+
+      if (parameterExactResultSize > 0 && quantizer.refinementObjectSpace != 0) {
+	auto &os = *quantizer.refinementObjectSpace;
+	auto &repo = os.getRepository();
+	auto &comparator = os.getComparator();
+#pragma omp parallel for
+	for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) {
+	  auto *fq = static_cast<float*>(searchContainer.getQuery(qi));
+	  auto *q = os.allocateNormalizedObject(fq, dimension);
+	  for (auto &r : searchContainer.batchResult[qi]) {
+	    r.distance = comparator(*q, *repo.get(r.id));
+	  }
+	  os.deleteObject(q);
+	  std::sort(searchContainer.batchResult[qi].begin(), 
+		    searchContainer.batchResult[qi].end());
+	  if (parameterExactResultSize > 0) {
+	    if (searchContainer.batchResult[qi].size() > parameterExactResultSize ) {
+	      searchContainer.batchResult[qi].resize(parameterExactResultSize);
+	    }
+	  }
+	}
+      }
     }
 
     void searchInTwoSteps(QBG::SearchContainer &searchContainer) {
-      if (searchContainer.isEmptyObject()) {
-	NGT::Object query(searchContainer.objectVector, getQuantizer().globalCodebookIndex.getObjectSpace());
-	SearchContainer sc(searchContainer, query);
-	searchInTwoSteps(sc);
-	searchContainer.workingResult = std::move(sc.workingResult);
-	return;
+      auto parameterSize = searchContainer.size;
+      auto parameterExactResultSize = searchContainer.size;
+      if (searchContainer.refinementExpansion >= 1.0) {
+        parameterSize *= searchContainer.refinementExpansion;
+      } else {
+	parameterExactResultSize = 0;
       }
       NGT::ObjectDistances blobs;
-      NGT::SearchContainer sc(searchContainer);
-      sc.setResults(&blobs);
-      sc.setEpsilon(searchContainer.blobExplorationCoefficient - 1.0);
-      sc.setSize(searchContainer.numOfProbes);
-
       auto &quantizer = getQuantizer();
       auto &globalIndex = quantizer.globalCodebookIndex;
-
       auto &quantizedObjectDistance = quantizer.getQuantizedObjectDistance();
       if (searchContainer.objectVector.size() == 0) {
 	NGTThrowException("search: object is null.");
       }
+      auto dimension = getQuantizer().globalCodebookIndex.getObjectSpace().getDimension();
       std::vector<float> rotatedQuery = searchContainer.objectVector;
-      {
-	NGT::Object *query = allocateObject(searchContainer.objectVector);
-	NGT::SearchContainer tsc(sc, *query);
-	tsc.setResults(&sc.getResult());
-	globalIndex.search(tsc);
-	globalIndex.deleteObject(query);
+      if (rotatedQuery.size() < dimension) {
+        if (rotatedQuery.size() == quantizer.property.genuineDimension ||
+            rotatedQuery.size() + 1 == quantizer.property.genuineDimension) {
+          rotatedQuery.resize(dimension);
+        }
       }
-      if (blobs.empty()) {
-	std::cerr << "Warning: No blobs can be searched." << std::endl;
-	std::cerr << "  global index size=" << globalIndex.getObjectRepositorySize() << std::endl;
-	std::cerr << "  size=" << sc.size << std::endl;
-	return;
+      std::unique_ptr<std::vector<float>> resizedQuery = nullptr;
+      if (parameterExactResultSize > 0) {
+	std::unique_ptr<std::vector<float>> tmp(new std::vector<float>(rotatedQuery));
+	resizedQuery = std::move(tmp);
+      }
+      NGT::Object *query = 0;
+      try {
+	query = allocateObject(rotatedQuery);
+      } catch(NGT::Exception &err) {
+	std::stringstream msg;
+	msg << "search : allocate query for global. dimension=" << searchContainer.objectVector.size()
+	    << " " << err.what();
+	NGTThrowException(msg);
+      }
+      {
+	NGT::SearchContainer gsc(*query);
+	gsc.setResults(&blobs);
+	gsc.setEpsilon(searchContainer.blobExplorationCoefficient - 1.0);
+	gsc.setSize(searchContainer.numOfProbes);
+	globalIndex.search(gsc);
+	if (blobs.empty()) {
+	  std::stringstream msg;
+	  msg << "Error! No blobs can be searched.";
+	  msg << " global index size=" << globalIndex.getObjectRepositorySize();
+	  msg << " size=" << gsc.size << " # of probes=" << searchContainer.numOfProbes;
+	  NGTThrowException(msg);
+	}
       }
-
 #if defined(NGTQG_ROTATION)
       if (quantizedObjectDistance.rotation != 0) {
 	quantizedObjectDistance.rotation->mul(rotatedQuery.data());
       }
 #endif
+      void *selectiveQuery = rotatedQuery.data();
+      NGT::ObjectSpace::ObjectType objectType = NGT::ObjectSpace::ObjectTypeNone;
+      switch(quantizer.property.localClusterDataType) {
+	case NGTQ::ClusterDataTypeSQSU8:
+	  objectType = NGT::ObjectSpace::ObjectType::Qsuint8; break;
+        default: break;
+      }
+      uint8_t scalarQuantizedObject[rotatedQuery.size()];
+      if (objectType != NGT::ObjectSpace::ObjectTypeNone) {
+	auto dimension = rotatedQuery.size();
+	float sqobj[dimension];
+	memcpy(sqobj, rotatedQuery.data(), dimension * sizeof(float));
+	auto offset = getQuantizer().property.scalarQuantizationOffset;
+	auto scale = getQuantizer().property.scalarQuantizationScale;
+	NGT::ObjectSpace::quantizeToQint8(sqobj, dimension, scalarQuantizedObject, objectType, offset, scale);
+	selectiveQuery = scalarQuantizedObject;
+      }
+
       std::unordered_map<size_t, NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8> luts;
       size_t foundCount = 0;
-      size_t k = searchContainer.size;
+      size_t k = parameterSize;
       NGT::Distance radius = FLT_MAX;
       NGT::NeighborhoodGraph::ResultSet result;
 #ifdef NGTQBG_COARSE_BLOB
       NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 lookupTable;
       quantizedObjectDistance.initialize(lookupTable);
 #endif
+      //NGTQ::BooleanSet *checkedIDs = nullptr;
+      std::unique_ptr<NGTQ::BooleanSet> checkedIDs = nullptr;
+      if (quantizer.objectList.size() < 5000000) {
+	//checkedIDs = new NGTQ::BooleanVector(quantizer.objectList.size());
+	std::unique_ptr<NGTQ::BooleanVector> tmp(new NGTQ::BooleanVector(quantizer.objectList.size()));
+	checkedIDs = std::move(tmp);
+      } else {
+	//checkedIDs = new NGTQ::BooleanHash(quantizer.objectList.size());
+	std::unique_ptr<NGTQ::BooleanHash> tmp(new NGTQ::BooleanHash(quantizer.objectList.size()));
+	checkedIDs = std::move(tmp);
+      }
       for (size_t idx = 0; idx < blobs.size(); idx++) {
 #ifdef NGTQBG_COARSE_BLOB
 	NGT::Distance blobDistance = std::numeric_limits<NGT::Distance>::max();
@@ -1161,7 +1831,8 @@ namespace QBG {
 	auto endIvtID = graphNodeToInvertedIndexEntries[graphNodeID] + 1;
 	for (auto blobID = beginIvtID; blobID < endIvtID; blobID++) {
 	  auto subspaceID = quantizedBlobGraph[blobID].subspaceID;
-	  quantizedObjectDistance.createDistanceLookup(rotatedQuery.data(), subspaceID, lookupTable);
+	  //quantizedObjectDistance.createDistanceLookup(rotatedQuery.data(), subspaceID, lookupTable);
+	  quantizedObjectDistance.createDistanceLookup(selectiveQuery, subspaceID, lookupTable);
 	  NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut = lookupTable;
 #else
 	{
@@ -1176,9 +1847,8 @@ namespace QBG {
           }
 	  NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut = (*luti).second;
 #endif
-
 	  NGT::Distance bd;
-	  std::tie(bd, radius) = judge(quantizedBlobGraph[blobID], k, radius, lut, result, foundCount);
+	  std::tie(bd, radius) = judge(quantizedBlobGraph[blobID], k, radius, lut, result, foundCount, selectiveQuery, &checkedIDs);
 #ifdef NGTQBG_COARSE_BLOB
 	  if (bd < blobDistance) {
 	    blobDistance = bd;
@@ -1190,19 +1860,20 @@ namespace QBG {
 #endif
       }
       if (searchContainer.resultIsAvailable()) {
-	if (searchContainer.exactResultSize > 0) {
+	if (parameterExactResultSize > 0) {
 	  NGT::ObjectDistances &qresults = searchContainer.getResult();
-	  refineDistances(searchContainer, quantizer, result, qresults);
+	  refineDistances(quantizer, result, qresults, parameterExactResultSize, resizedQuery);
 	} else {
 	  searchContainer.getResult().moveFrom(result);
 	}
       } else {
-	if (searchContainer.exactResultSize > 0) {
-	  refineDistances(searchContainer, quantizer, result, searchContainer.workingResult);
+	if (parameterExactResultSize > 0) {
+	  refineDistances(quantizer, result, searchContainer.workingResult, parameterExactResultSize, resizedQuery);
 	} else {
 	  searchContainer.workingResult = std::move(result);
 	}
       }
+      deleteObject(query);
     }
 
     void searchInOneStep(QBG::SearchContainer &searchContainer) {
@@ -1233,6 +1904,13 @@ namespace QBG {
 	msg << "The specified index is not now searchable. ";
 	NGTThrowException(msg);
       }
+      auto parameterSize = searchContainer.size;
+      auto parameterExactResultSize = searchContainer.size;
+      if (searchContainer.refinementExpansion >= 1.0) {
+        parameterSize *= searchContainer.refinementExpansion;
+      } else {
+	parameterExactResultSize = 0;
+      }
 
       auto &quantizer = getQuantizer();
       auto &globalIndex = quantizer.globalCodebookIndex;
@@ -1246,7 +1924,7 @@ namespace QBG {
 	searchContainer.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT;
       }
 
-      const auto requestedSize = searchContainer.size;
+      const auto requestedSize = parameterSize;
       searchContainer.size = std::numeric_limits<uint32_t>::max();
 
       // setup edgeSize
@@ -1280,7 +1958,19 @@ namespace QBG {
       size_t explorationSize = 1;
       auto &quantizedObjectDistance = quantizer.getQuantizedObjectDistance();
       std::unordered_map<size_t, NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8> luts;
+      auto dimension = getQuantizer().globalCodebookIndex.getObjectSpace().getDimension();
       std::vector<float> rotatedQuery = searchContainer.objectVector;
+      if (rotatedQuery.size() < dimension) {
+        if (rotatedQuery.size() == quantizer.property.genuineDimension ||
+            rotatedQuery.size() + 1 == quantizer.property.genuineDimension) {
+          rotatedQuery.resize(dimension);
+        }
+      }
+      std::unique_ptr<std::vector<float>> resizedQuery = nullptr;
+      if (parameterExactResultSize > 0) {
+	std::unique_ptr<std::vector<float>> tmp(new std::vector<float>(rotatedQuery));
+	resizedQuery = std::move(tmp);
+      }
       quantizedObjectDistance.rotation->mul(rotatedQuery.data());
       NGT::Distance radius = searchContainer.radius;
       if (requestedSize >= std::numeric_limits<int32_t>::max()) {
@@ -1435,15 +2125,15 @@ namespace QBG {
       }
 
       if (searchContainer.resultIsAvailable()) {
-	if (searchContainer.exactResultSize > 0) {
+	if (parameterExactResultSize > 0) {
 	  NGT::ObjectDistances &qresults = searchContainer.getResult();
-	  refineDistances(searchContainer, quantizer, results, qresults);
+	  refineDistances(quantizer, results, qresults, parameterExactResultSize, resizedQuery);
 	} else {
 	  searchContainer.getResult().moveFrom(results);
 	}
       } else {
-	if (searchContainer.exactResultSize > 0) {
-	  refineDistances(searchContainer, quantizer, results, searchContainer.workingResult);
+	if (parameterExactResultSize > 0) {
+	  refineDistances(quantizer, results, searchContainer.workingResult, parameterExactResultSize, resizedQuery);
 	} else {
 	  searchContainer.workingResult = std::move(results);
 	}
@@ -1505,7 +2195,7 @@ namespace QBG {
     static void build(const std::string &indexPath,
 		      std::vector<std::vector<float>> &quantizerCodebook,
 		      std::vector<uint32_t> &codebookIndex,
-		      std::vector<uint32_t> &objectIndex,
+		      std::vector<std::vector<uint32_t>> &objectIndex,
 		      size_t beginID = 1, size_t endID = 0) {
       buildNGTQ(indexPath, quantizerCodebook, codebookIndex, objectIndex, beginID, endID);
       buildQBG(indexPath);
@@ -1521,7 +2211,7 @@ namespace QBG {
 			  size_t beginID = 1, size_t endID = 0, bool verbose = false) {
       std::vector<std::vector<float>> quantizerCodebook;
       std::vector<uint32_t> codebookIndex;
-      std::vector<uint32_t> objectIndex;
+      std::vector<std::vector<uint32_t>> objectIndex;
       {
 	std::string codebookPath = quantizerCodebookFile;
 	if (codebookPath.empty()) {
@@ -1586,23 +2276,38 @@ namespace QBG {
 	  objectIndexPath = QBG::Index::getObjectIndexFile(indexPath);
 	}
 	if (objectIndexPath != "-") {
-	  std::ifstream stream(objectIndexPath);
-	  if (!stream) {
-	    std::stringstream msg;
-	    msg << "Cannot open the codebook index. " << objectIndexPath;
-	    NGTThrowException(msg);
+	  {
+	    std::ifstream stream(objectIndexPath);
+	    if (!stream) {
+	      std::stringstream msg;
+	      msg << "Cannot open the codebook index. " << objectIndexPath;
+	      NGTThrowException(msg);
+	    }
+	    size_t nOfObjs = 0;
+	    std::string line;
+	    while (getline(stream, line)) nOfObjs++;
+	    objectIndex.resize(nOfObjs);
 	  }
-	  std::string line;
-	  while (getline(stream, line)) {
-	    std::vector<std::string> tokens;
-	    NGT::Common::tokenize(line, tokens, " \t");
-	    std::vector<float> object;
-	    if (tokens.size() != 1) {
+	  {
+	    std::ifstream stream(objectIndexPath);
+	    if (!stream) {
 	      std::stringstream msg;
-	      msg << "The specified object index is invalid. " << line;
+	      msg << "Cannot open the codebook index. " << objectIndexPath;
 	      NGTThrowException(msg);
 	    }
-	    objectIndex.push_back(NGT::Common::strtol(tokens[0]));
+	    std::string line;
+	    size_t idx = 0;
+	    while (getline(stream, line)) {
+	      std::vector<std::string> tokens;
+	      NGT::Common::tokenize(line, tokens, " \t");
+	      if (tokens.size() > 0) {
+		objectIndex[idx].reserve(tokens.size());
+		for (auto &token : tokens) {
+		  objectIndex[idx].emplace_back(NGT::Common::strtol(token));
+		}
+	      }
+	      idx++;
+	    }
 	  }
         }
       }
@@ -1612,7 +2317,7 @@ namespace QBG {
     static void buildNGTQ(const std::string &indexPath,
 			  std::vector<std::vector<float>> &quantizerCodebook,
 			  std::vector<uint32_t> &codebookIndex,
-			  std::vector<uint32_t> &objectIndex,
+			  std::vector<std::vector<uint32_t>> &objectIndex,
 			  size_t beginID = 1, size_t endID = 0, bool verbose = false) {
       NGT::StdOstreamRedirector redirector(!verbose);
       redirector.begin();
@@ -1634,34 +2339,49 @@ namespace QBG {
 	  size_t size = index.getQuantizer().objectList.size();
 	  size = size == 0 ? 0 : size - 1;
 	  objectIndex.resize(size);
+	  for (auto &list : objectIndex) {
+	    list.emplace_back(0);
+	  }
 	}
-	index.createIndex(quantizerCodebook, codebookIndex, objectIndex, beginID, endID);
+	index.createIndex(codebookIndex, objectIndex, beginID, endID);
       }
 
       {
+	{
+	  const std::string comcp = "cp -f " + QBG::Index::getBlobFile(indexPath) + " " + QBG::Index::getStoredBlobFile(indexPath);
+	  if (system(comcp.c_str()) == -1) {
+	    std::cerr << "Warning. cannot remove the blob. "
+		      << comcp << std::endl;
+	  }
+	}
 	char *s = getenv("NGT_NOT_REMOVE_WORKSPACE");
 	if (s == 0) {
-	  const string comrmdir = "rm -rf " + indexPath + "/" + getWorkspaceName();
+	  const std::string comrmdir = "rm -rf " + indexPath + "/" + getWorkspaceName();
 	  if (system(comrmdir.c_str()) == -1) {
 	    std::cerr << "Warning. cannot remove the workspace directory. "
 		      << comrmdir << std::endl;
 	  }
 	}
-	const string comrm = "rm -f " + indexPath + "/" + NGTQ::Quantizer::getInvertedIndexFile();
-	if (system(comrm.c_str()) == -1) {
-	  std::cerr << "Warning. cannot remove the indeverted index. "
-		    << comrm << std::endl;
+	{
+	  const std::string comrm = "rm -f " + indexPath + "/" + NGTQ::Quantizer::getInvertedIndexFile();
+	  if (system(comrm.c_str()) == -1) {
+	    std::cerr << "Warning. cannot remove the inverted index. "
+		      << comrm << std::endl;
+	  }
 	}
       }
 
       timer.stop();
+      index.save();
+
+      QBG::Optimizer::extractScaleAndOffset(indexPath, -1.0, -1, verbose);
+
+      redirector.end();
       std::cerr << "NGTQ index is completed." << std::endl;
       std::cerr << "  time=" << timer << std::endl;
       std::cerr << "  vmsize=" << NGT::Common::getProcessVmSizeStr() << std::endl;
       std::cerr << "  peak vmsize=" << NGT::Common::getProcessVmPeakStr() << std::endl;
       std::cerr << "saving..." << std::endl;
-      index.save();
-      redirector.end();
     }
 
     static void buildQBG(const std::string &indexPath, bool verbose = false) {
@@ -1814,6 +2534,8 @@ namespace QBG {
       assert(threadSize != 0);
 
       size_t dataSize = 0;
+      NGTQ::Property property;
+      property.load(indexPath);
       {
 	const char *ngtDirString = "/tmp/ngt-XXXXXX";
 	char ngtDir[strlen(ngtDirString) + 1];
@@ -1826,9 +2548,8 @@ namespace QBG {
 	  msg << "Error! moving is failed. " << mvcom;
 	  NGTThrowException(msg);
 	}
-
-	NGT::Index::append(tmpDir + "/" + NGTQ::Quantizer::getGlobalFile(), blobs, threadSize, dataSize);
-
+	NGT::Index::appendFromTextObjectFile(tmpDir + "/" + NGTQ::Quantizer::getGlobalFile(),
+					     blobs, dataSize);
 	auto unlog = false;
 	NGT::GraphOptimizer graphOptimizer(unlog);
 	graphOptimizer.searchParameterOptimization = false;
@@ -1852,9 +2573,11 @@ namespace QBG {
 	  std::cerr << "Warning. remove is failed. " << rmcom << std::endl;
 	}
       }
-      NGTQ::Property property;
-      property.load(indexPath);
 
+      if (property.centroidCreationMode != NGTQ::CentroidCreationModeStaticLayer &&
+	  property.centroidCreationMode != NGTQ::CentroidCreationModeStatic) {
+	std::cerr << "Warning. Inspite of not static mode, load the local codebook." << std::endl;
+      }
       std::vector<std::string> tokens;
       NGT::Common::tokenize(localCodebooks, tokens, "@");
       if (tokens.size() != 2) {
@@ -1868,7 +2591,8 @@ namespace QBG {
 	std::cerr << data.str() << "->" << localCodebook.str() << std::endl;
 	NGT::Index::append(localCodebook.str(), data.str(), threadSize, dataSize);
       }
-
+      property.localCodebookState = true;
+      property.save(indexPath);
 #ifdef NGTQ_QBG
       std::vector<std::vector<float>> qCodebook;
       {
@@ -1921,6 +2645,17 @@ namespace QBG {
       redirector.end();
     }
 
+    static void setupObjects(std::string indexPath, size_t nOfObjects, bool verbose) {
+      NGTQ::Property property;
+      property.load(indexPath);
+      if (property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) {
+	Optimizer::convertObjectsFromInnerProductToL2(indexPath, nOfObjects, verbose);
+      }
+      if (property.distanceType == NGTQ::DistanceType::DistanceTypeNormalizedCosine) {
+	Optimizer::normalizeObjectsForCosine(indexPath, nOfObjects, verbose);
+      }
+    }
+
     static const std::string getSubvectorPrefix() { return "sv"; }
     static const std::string getHierarchicalClusteringPrefix() { return "hkc"; }
     static const std::string getSecondCentroidSuffix() { return "_2c"; }
@@ -1946,6 +2681,7 @@ namespace QBG {
     static const std::string getObjectIndexFile(std::string indexPath) { return getPrefix(indexPath) + getObjTo3rdSuffix(); }
     static const std::string getRotationFile(std::string indexPath) { return getPQFile(indexPath) + "/" + getRotationFile
 (); }
+    static const std::string getStoredBlobFile(std::string indexPath) { return indexPath + "/blbc"; }
 
     static const std::string getWorkspaceName() { return "ws"; }
     const std::string path;
diff --git a/lib/NGT/NGTQ/QuantizedGraph.h b/lib/NGT/NGTQ/QuantizedGraph.h
index 8a30ceb..f7bac30 100644
--- a/lib/NGT/NGTQ/QuantizedGraph.h
+++ b/lib/NGT/NGTQ/QuantizedGraph.h
@@ -66,7 +66,9 @@ namespace NGTQG {
   class QuantizedGraphRepository : public std::vector<QuantizedNode> {
     typedef std::vector<QuantizedNode> PARENT;
   public:
-    QuantizedGraphRepository(NGTQ::Index &quantizedIndex): numOfSubspaces(quantizedIndex.getQuantizer().property.localDivisionNo) {}
+    QuantizedGraphRepository(NGTQ::Index &quantizedIndex) :
+      quantizer(quantizedIndex.getQuantizer()),
+      numOfSubspaces(quantizedIndex.getQuantizer().property.localDivisionNo) {}
     ~QuantizedGraphRepository() {}
 
     void *get(size_t id) {
@@ -86,6 +88,7 @@ namespace NGTQG {
     void construct(NGT::GraphRepository &graphRepository, NGTQ::Index &quantizedIndex, size_t maxNoOfEdges) {
       NGTQ::InvertedIndexEntry<uint16_t> invertedIndexObjects(numOfSubspaces);
       quantizedIndex.getQuantizer().extractInvertedIndexObject(invertedIndexObjects);
+      std::cerr << "inverted index object size=" << invertedIndexObjects.size() << std::endl;
       quantizedIndex.getQuantizer().eraseInvertedIndexObject();
 
 
@@ -101,6 +104,7 @@ namespace NGTQG {
 	size_t numOfEdges = node.size() < maxNoOfEdges ? node.size() : maxNoOfEdges;
 	(*this)[id].ids.reserve(numOfEdges);
 	NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedIndex.getQuantizer().divisionNo, numOfEdges);
+	std::cerr << "pass XX " << node.size() << ":" << invertedIndexObjects.size() << std::endl;
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
 	for (auto i = node.begin(graphRepository.allocator); i != node.end(graphRepository.allocator); ++i) {
 	  if (distance(node.begin(graphRepository.allocator), i) >= static_cast<int64_t>(numOfEdges)) {
@@ -140,7 +144,10 @@ namespace NGTQG {
     }
 
     void serialize(std::ofstream &os, NGT::ObjectSpace *objspace = 0) {
+#ifdef NGT_IVI
+#else
       NGTQ::QuantizedObjectProcessingStream quantizedObjectProcessingStream(numOfSubspaces);
+#endif
       uint64_t n = numOfSubspaces;
       NGT::Serializer::write(os, n);
       n = PARENT::size();
@@ -149,14 +156,21 @@ namespace NGTQG {
         uint32_t sid = (*i).subspaceID;
         NGT::Serializer::write(os, sid);
 	NGT::Serializer::write(os, (*i).ids);
+#ifdef NGT_IVI
+	size_t streamSize = quantizer.getQuantizedObjectDistance().getSizeOfCluster((*i).ids.size());
+#else
 	size_t streamSize = quantizedObjectProcessingStream.getUint4StreamSize((*i).ids.size());
+#endif
 	NGT::Serializer::write(os, static_cast<uint8_t*>((*i).objects), streamSize);
       }
     }
 
     void deserialize(std::ifstream &is, NGT::ObjectSpace *objectspace = 0) {
       try {
+#ifdef NGT_IVI
+#else
 	NGTQ::QuantizedObjectProcessingStream quantizedObjectProcessingStream(numOfSubspaces);
+#endif
 	uint64_t n;
 	NGT::Serializer::read(is, n);
 	numOfSubspaces = n;
@@ -167,7 +181,11 @@ namespace NGTQG {
 	  NGT::Serializer::read(is, sid);
 	  (*i).subspaceID = sid;
 	  NGT::Serializer::read(is, (*i).ids);
+#ifdef NGT_IVI
+	  size_t streamSize = quantizer.getQuantizedObjectDistance().getSizeOfCluster((*i).ids.size());
+#else
           size_t streamSize = quantizedObjectProcessingStream.getUint4StreamSize((*i).ids.size());
+#endif
 	  uint8_t *objectStream = new uint8_t[streamSize];
 	  NGT::Serializer::read(is, objectStream, streamSize);
 	  (*i).objects = objectStream;
@@ -200,6 +218,7 @@ namespace NGTQG {
       deserialize(is);
     }
 
+    NGTQ::Quantizer &quantizer;
     size_t numOfSubspaces;
   };
 
@@ -451,7 +470,7 @@ namespace NGTQG {
       }
       if (dimension % dimensionOfSubvector != 0) {
 	stringstream msg;
-	msg << "Quantizer::getNumOfSubvectors: dimensionOfSubvector is invalid. " << dimension << " : " << dimensionOfSubvector << std::endl;
+	msg << "Quantizer::getNumOfSubvectors: dimensionOfSubvector is invalid. " << dimension << " : " << dimensionOfSubvector;
 	NGTThrowException(msg);
       }
       return dimension / dimensionOfSubvector;
@@ -465,8 +484,9 @@ namespace NGTQG {
 	struct stat st;
 	std::string qgGraphPath(qgPath + "/grp");
 	if (stat(qgGraphPath.c_str(), &st) == 0) {
-	  std::cerr << "already exists" << std::endl;
-	  abort();
+	  stringstream msg;
+	  msg << "Already exists. " << qgGraphPath;
+	  NGTThrowException(msg);
 	} else {
 	  NGT::GraphRepository graph;
 	  NGT::GraphIndex::loadGraph(indexPath, graph);
diff --git a/lib/NGT/NGTQ/Quantizer.h b/lib/NGT/NGTQ/Quantizer.h
index 4e925e7..aee5fd8 100644
--- a/lib/NGT/NGTQ/Quantizer.h
+++ b/lib/NGT/NGTQ/Quantizer.h
@@ -21,8 +21,11 @@
 #include	"NGT/Clustering.h"
 #include	<unordered_map>
 #include	"NGT/NGTQ/ObjectFile.h"
+#include	"NGT/HashBasedBooleanSet.h"
 
 
+#define		NGT_IVI
+
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR) || defined(NGT_QBG_DISABLED)
 #undef NGTQ_QBG
 #else
@@ -46,13 +49,13 @@
 #define NGTQ_OBJECT_IN_MEMORY
 
 #define NGTQ_UINT8_LUT
-#define NGTQ_SIMD_BLOCK_SIZE	16
-#define NGTQ_BATCH_SIZE		2
+#define NGTQ_SIMD_BLOCK_SIZE	16	
+#define NGTQ_BATCH_SIZE		2	
 #define NGTQ_UINT4_OBJECT
 #define NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION
 #define NGTQG_PREFETCH
 #if defined(NGT_AVX512)
-#define NGTQG_AVX512
+#define NGTQG_AVX512	
 #warning "AVX512 is available for NGTQG"
 #elif defined(NGT_AVX2)
 #define NGTQG_AVX2
@@ -66,7 +69,7 @@
 
 
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
-#define	NGTQ_SHARED_INVERTED_INDEX
+#define	NGTQ_SHARED_INVERTED_INDEX	
 #endif
 
 extern "C" {
@@ -77,9 +80,35 @@ extern "C" {
 
 namespace NGTQ {
 
+class BooleanSet {
+ public:
+  virtual bool get(size_t idx) = 0;
+  virtual void set(size_t idx) = 0;
+  virtual bool operator[](size_t idx) = 0;
+};
+
+class BooleanVector : public std::vector<bool>, public BooleanSet {
+ public:
+  BooleanVector(size_t s): std::vector<bool>(s, false){}
+  virtual ~BooleanVector() {}
+  bool get(size_t idx) { return BooleanVector::get(idx); }
+  void set(size_t idx) { std::vector<bool>::operator[](idx) = true; }
+  bool operator[](size_t idx) { return std::vector<bool>::operator[](idx); }
+};
+
+class BooleanHash : public HashBasedBooleanSet<uint32_t>, public BooleanSet {
+ public:
+  BooleanHash(size_t s): HashBasedBooleanSet<uint32_t>(s){}
+  virtual ~BooleanHash() {}
+  bool get(size_t idx) { return BooleanHash::get(idx); }
+  void set(size_t idx) { HashBasedBooleanSet<uint32_t>::set(idx); }
+  bool operator[](size_t idx) { return HashBasedBooleanSet<uint32_t>::operator[](idx); }
+};
+
 class Rotation : public std::vector<float> {
   typedef std::vector<float> PARENT;
  public:
+  Rotation():dim(0) {}
   Rotation& operator=(const std::vector<float> &r) {
     PARENT::operator=(r);
     dim = sqrt(PARENT::size());
@@ -167,9 +196,7 @@ class Rotation : public std::vector<float> {
       std::cerr << "rotation::deserialize: Fatal inner error. Invalid data. " << dim << ":" << dim * dim << ":" << v << std::endl;
       abort();
     }
-
     is.read(reinterpret_cast<char*>(PARENT::data()), PARENT::size() * sizeof(float));
-
   }
 
   bool isIdentity() {
@@ -466,8 +493,8 @@ class InvertedIndexObject {
       localID[i] = 0;
     }
   }
-  uint32_t	id;
-  T		localID[1];
+  uint32_t	id;		
+  T		localID[1];	
 };
 
 template <typename T>
@@ -672,7 +699,7 @@ class LocalDatam {
 #else
   LocalDatam(size_t iii, size_t iil) : iiIdx(iii), iiLocalIdx(iil) {}
 #endif
-  size_t iiIdx;
+  size_t iiIdx;	
   size_t iiLocalIdx;
 #ifdef NGTQ_QBG
   uint32_t subspaceID;
@@ -686,12 +713,13 @@ class SerializableObject : public NGT::Object {
 };
 
  enum DataType {
-   DataTypeUint8 = 0,
-   DataTypeFloat = 1
+   DataTypeUint8 = ObjectFile::DataTypeUint8,
+   DataTypeFloat = ObjectFile::DataTypeFloat,
 #ifdef NGT_HALF_FLOAT
-   ,
-   DataTypeFloat16 = 2
+   DataTypeFloat16 = ObjectFile::DataTypeFloat16,
 #endif
+   DataTypeNone = ObjectFile::DataTypeNone,
+   DataTypeAny = 100	
  };
 
  typedef NGT::ObjectSpace::DistanceType		DistanceType;
@@ -701,7 +729,7 @@ class SerializableObject : public NGT::Object {
    CentroidCreationModeStatic		= 1,
    CentroidCreationModeDynamicKmeans	= 2,
    CentroidCreationModeStaticLayer	= 3,
-   CentroidCreationModeNone		= 9
+   CentroidCreationModeNone		= 9	
  };
 
  enum AggregationMode {
@@ -717,6 +745,15 @@ class SerializableObject : public NGT::Object {
    QuantizerTypeQG	= 1,
    QuantizerTypeQBG	= 2
  };
+
+ enum ClusterDataType {
+   ClusterDataTypeNone	= 0,
+   ClusterDataTypeNQ	= 1,
+   ClusterDataTypePQ4	= 2,
+   ClusterDataTypeSQSU8	= 4,
+   ClusterDataTypeSQU7T	= 7,
+   ClusterDataTypeSQS8T	= 9
+ };
  
  class Property {
  public:
@@ -742,14 +779,20 @@ class SerializableObject : public NGT::Object {
     localCentroidCreationMode = CentroidCreationModeDynamic;
     localIDByteSize	= 0;		// finally decided by localCentroidLimit
     localCodebookState	= false;	// not completed
-    localClusteringSampleCoefficient = 10;
+    localClusteringSampleCoefficient = 10;	
     quantizerType	= QuantizerTypeNone;
 #ifdef NGTQ_OBJECT_IN_MEMORY
-    objectListOnMemory	= false;
+    refinementDataType = DataTypeNone;
 #endif
+    localClusterDataType = NGTQ::ClusterDataTypePQ4;
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     invertedIndexSharedMemorySize = 512; // MB
 #endif
+    scalarQuantizationScale = 0.0;
+    scalarQuantizationOffset = 0.0;
+    scalarQuantizationClippingRate = 0.01;
+    scalarQuantizationNoOfSamples = 0;
+    maxMagnitude = -1.0;
   }
 
   void save(const string &path) {
@@ -772,16 +815,22 @@ class SerializableObject : public NGT::Object {
     prop.set("BatchSize", 	(long)batchSize);
     prop.set("CentroidCreationMode", (long)centroidCreationMode);
     prop.set("LocalCentroidCreationMode", (long)localCentroidCreationMode);
-    prop.set("LocalIDByteSize",	(long)localIDByteSize);
+    prop.set("LocalIDByteSize",	(long)localIDByteSize);	
     prop.set("LocalCodebookState", (long)localCodebookState);
     prop.set("LocalSampleCoefficient", (long)localClusteringSampleCoefficient);
     prop.set("QuantizerType",	(long)quantizerType);
 #ifdef NGTQ_OBJECT_IN_MEMORY
-    prop.set("ObjectListOnMemory",	(long)objectListOnMemory);
+    prop.set("RefinementDataType",	(long)refinementDataType);
 #endif
+    prop.set("LocalClusterDataType",	(long)localClusterDataType);
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     prop.set("InvertedIndexSharedMemorySize", 	(long)invertedIndexSharedMemorySize);
 #endif
+    prop.set("ScalarQuantizationScale", scalarQuantizationScale);
+    prop.set("ScalarQuantizationOffset", scalarQuantizationOffset);
+    prop.set("ScalarQuantizationClippingRate", scalarQuantizationClippingRate);
+    prop.set("ScalarQuantizationNoOfSamples", scalarQuantizationNoOfSamples);
+    prop.set("MaxMagnitude", maxMagnitude);
     prop.save(path + "/prf");
   }
 
@@ -835,16 +884,22 @@ class SerializableObject : public NGT::Object {
     setupLocalIDByteSize();
     quantizerType	= (QuantizerType)prop.getl("QuantizerType", quantizerType);
 #ifdef NGTQ_OBJECT_IN_MEMORY
-    objectListOnMemory	= prop.getl("ObjectListOnMemory", objectListOnMemory);
+    refinementDataType	= (DataType)prop.getl("RefinementDataType", refinementDataType);
 #endif
+    localClusterDataType = (ClusterDataType)prop.getl("LocalClusterDataType", localClusterDataType);
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     invertedIndexSharedMemorySize
       = prop.getl("InvertedIndexSharedMemorySize", invertedIndexSharedMemorySize);
 #endif
+    scalarQuantizationScale	= prop.getf("ScalarQuantizationScale", scalarQuantizationScale);
+    scalarQuantizationOffset	= prop.getf("ScalarQuantizationOffset", scalarQuantizationOffset);
+    scalarQuantizationClippingRate	= prop.getf("ScalarQuantizationClippingRate", scalarQuantizationClippingRate);
+    scalarQuantizationNoOfSamples	= prop.getl("ScalarQuantizationNoOfSamples", scalarQuantizationNoOfSamples);
+    maxMagnitude	= prop.getf("MaxMagnitude", maxMagnitude);
   }
 
-  void setup(const Property &p) {
-    *this = p;
+  size_t getDataSize() {
+    size_t dataSize = 0;
 #ifdef NGTQ_QBG
     switch (genuineDataType) {
 #else
@@ -877,8 +932,21 @@ class SerializableObject : public NGT::Object {
       NGTThrowException("Quantizer constructor: Inner error. Invalid data type.");
       break;
     }
+    return dataSize;
+  }
+
+  void setup(const Property &p) {
+    *this = p;
     setupLocalIDByteSize();
     localDivisionNo = getLocalCodebookNo();
+#ifdef NGTQ_QBG
+    if (dimension == 0) {
+      dimension = genuineDimension;
+    }
+    if (dimension % 4 != 0) {
+      dimension = ((dimension - 1) / 4 + 1) * 4;
+    }
+#endif
   }
 
   inline size_t getLocalCodebookNo() { return singleLocalCodebook ? 1 : localDivisionNo; }
@@ -906,11 +974,17 @@ class SerializableObject : public NGT::Object {
   size_t	localClusteringSampleCoefficient;
   QuantizerType	quantizerType;
 #ifdef NGTQ_OBJECT_IN_MEMORY
-  bool		objectListOnMemory;
+  DataType	refinementDataType;
 #endif
+  ClusterDataType localClusterDataType;
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
   size_t	invertedIndexSharedMemorySize;
 #endif
+  float		scalarQuantizationScale;
+  float		scalarQuantizationOffset;
+  float		scalarQuantizationClippingRate;
+  size_t	scalarQuantizationNoOfSamples;
+  float		maxMagnitude;
 };
 
 #ifdef NGTQ_DISTANCE_ANGLE
@@ -924,230 +998,599 @@ class SerializableObject : public NGT::Object {
  };
 #endif
 
-class QuantizedObjectDistance {
-public:
-  class DistanceLookupTable {
-  public:
-    DistanceLookupTable():localDistanceLookup(0) {}
-    ~DistanceLookupTable() {
-      if (localDistanceLookup != 0) {
-	delete[] localDistanceLookup;
-	localDistanceLookup = 0;
-      }
-    }
-    bool isValid(size_t idx) {
-#ifdef NGTQ_QBG
-      std::cerr << "isValid() is not implemented" << std::endl;
-      abort();
-#else
-      return flag[idx];
-#endif
-    }
-#ifndef NGTQ_DISTANCE_ANGLE
-    void set(size_t idx, double d) {
-#ifndef NGTQ_QBG
-      flag[idx] = true;
-#endif
-      localDistanceLookup[idx] = d;
-    }
-    double getDistance(size_t idx) { return localDistanceLookup[idx]; }
-#endif
-    void initialize(size_t s) {
-      size = s;
-#ifdef NGTQ_DISTANCE_ANGLE
-      localDistanceLookup = new LocalDistanceLookup[size];
-#else
-      localDistanceLookup = new float[size];
-#endif
-#ifndef NGTQ_QBG
-      flag.resize(size, false);
-#endif
-    }
-#ifdef NGTQ_DISTANCE_ANGLE
-    LocalDistanceLookup	*localDistanceLookup;
-#else
-    float		*localDistanceLookup;
-#endif
-    size_t		size;
-#ifndef NGTQ_QBG
-    vector<bool>	flag;
-#endif
-  };
-
-  class DistanceLookupTableUint8 {
-  public:
-    DistanceLookupTableUint8():localDistanceLookup(0) {}
-    ~DistanceLookupTableUint8() {
-      if (localDistanceLookup != 0) {
-	delete[] localDistanceLookup;
-	localDistanceLookup = 0;
-	delete[] scales;
-	delete[] offsets;
-      }
-    }
-    void initialize(size_t numOfSubspaces, size_t localCodebookCentroidNo) {
-      size_t numOfAlignedSubvectors = ((numOfSubspaces - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE;
-      size = numOfAlignedSubvectors * localCodebookCentroidNo;
-      localDistanceLookup = new uint8_t[size];
-      scales = new float[numOfAlignedSubvectors];
-      offsets = new float[numOfAlignedSubvectors];
-      range512 = (numOfSubspaces >> 2) * step512;
-      range256 = (((numOfSubspaces - 1) >> 1) + 1) * step256;
-    }
-
-    uint8_t		*localDistanceLookup;
-    size_t		size;
-    size_t		aslignedNumOfSubspaces;
-    size_t		localCodebookCentroidNo;
-    float		*scales;
-    float		*offsets;
-    float		totalOffset;
-    size_t		range512;
-    size_t		range256;
-    static constexpr size_t		step512 = 32;
-    static constexpr size_t		step256 = 16;
-  };
 
-  QuantizedObjectDistance(){}
-  virtual ~QuantizedObjectDistance() {
-    delete[] localCentroids;
-    delete[] localCentroidsForSIMD;
+class QuantizedObjectProcessingStream {
+ public:
+  QuantizedObjectProcessingStream(size_t numOfSubspaces, size_t nOfObjects) {
+     initialize(numOfSubspaces);
+     numOfObjects = nOfObjects;
+     setStreamSize();
+     stream = new uint8_t[streamSize]();
   }
 
-  virtual double operator()(NGT::Object &object, size_t objectID, void *localID) = 0;
+  QuantizedObjectProcessingStream(size_t numOfSubspaces): stream(0) {
+    initialize(numOfSubspaces);
+  }
 
-  virtual double operator()(void *localID, DistanceLookupTable &distanceLUT) = 0;
+  ~QuantizedObjectProcessingStream() {
+    delete[] stream;
+  }
 
-#ifdef NGTQBG_MIN
-  virtual float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) = 0;
-#else
-  virtual void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) = 0;
-#endif
-  virtual double operator()(NGT::Object &object, size_t objectID, void *localID, DistanceLookupTable &distanceLUT) = 0;
+  void initialize(size_t divisionNo) {
+    numOfAlignedSubvectors = ((divisionNo - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE;
+    alignedBlockSize = NGTQ_SIMD_BLOCK_SIZE * numOfAlignedSubvectors;
+  }
 
-  template <typename T>
-  inline double getAngleDistanceUint8(NGT::Object &object, size_t objectID, T localID[]) {
-    assert(globalCodebookIndex != 0);
-    NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
-    size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject();
-    size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t);
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator);
-#else
-    unsigned char *gcptr = &gcentroid[0];
-#endif
-    unsigned char *optr = &((NGT::Object&)object)[0];
-    double normA = 0.0F;
-    double normB = 0.0F;
-    double sum = 0.0F;
-    for (size_t li = 0; li < localDivisionNo; li++) {
-      size_t idx = localCodebookNo == 1 ? 0 : li;
-      NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]);
+  static size_t getNumOfAlignedObjects(size_t numOfObjects) {
+    return (((numOfObjects - 1) / NGTQ_SIMD_BLOCK_SIZE + 1) * NGTQ_SIMD_BLOCK_SIZE);
+  }
+  
+  void setStreamSize() {
+    numOfAlignedObjects  = getNumOfAlignedObjects(numOfObjects);
+    streamSize = numOfAlignedObjects * numOfAlignedSubvectors;
+    return;
+  }
+#ifdef NGTQ_QBG
+  void arrangeQuantizedObject(size_t dataNo, size_t subvectorNo, uint8_t quantizedObject) {
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-      float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator);
+    abort();
 #else
-      float *lcptr = (float*)&lcentroid[0];
+    size_t blkNo = dataNo / NGTQ_SIMD_BLOCK_SIZE;	
+    size_t oft = dataNo - blkNo * NGTQ_SIMD_BLOCK_SIZE;	
+    stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft] = quantizedObject;
 #endif
-      float *lcendptr = lcptr + localDataSize;
-      while (lcptr != lcendptr) {
-	double a = *optr++;
-	double b = *gcptr++ + *lcptr++;
-	normA += a * a;
-	normB += b * b;
-	sum += a * b;
-      }
-    }
-    double cosine = sum / (sqrt(normA) * sqrt(normB));
-    if (cosine >= 1.0F) {
-      return 0.0F;
-    } else if (cosine <= -1.0F) {
-      return acos(-1.0F);
-    }
-    return acos(cosine);
   }
 
-#if defined(NGT_NO_AVX)
-  template <typename T>
-  inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) {
-    assert(globalCodebookIndex != 0);
-    NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
-    size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject();
-    size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t);
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator);
-#else
-    unsigned char *gcptr = &gcentroid[0];
-#endif
-    unsigned char *optr = &((NGT::Object&)object)[0];
-    double distance = 0.0;
-    for (size_t li = 0; li < localDivisionNo; li++) {
-      size_t idx = localCodebookNo == 1 ? 0 : li;
-      NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]);
+  void arrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+    for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) {
+      for (size_t idx = 0; idx < invertedIndexObjects.numOfSubvectors; idx++) {
+#ifdef NGTQ_UINT8_LUT
+#ifdef NGTQ_SIMD_BLOCK_SIZE
 #if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-      float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator);
+	abort();
 #else
-      float *lcptr = (float*)&lcentroid[0];
+	arrangeQuantizedObject(oidx, idx, invertedIndexObjects[oidx].localID[idx] - 1);
 #endif
-      double d = 0.0;
-      float *lcendptr = lcptr + localDataSize;
-      while (lcptr != lcendptr) {
-	double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++;
-	d += sub * sub;
-      }
-      distance += d;
-    }
-    return sqrt(distance);
-  }
-#else
-  template <typename T>
-  inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) {
-    assert(globalCodebookIndex != 0);
-    NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
-    size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject();
-    size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t);
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator);
 #else
-    unsigned char *gcptr = &gcentroid[0];
+	objectData[idx * noobjs + oidx] = invertedIndexObjects[oidx].localID[idx] - 1;
 #endif
-    unsigned char *optr = &((NGT::Object&)object)[0];
-    double distance = 0.0;
-    for (size_t li = 0; li < localDivisionNo; li++) {
-      size_t idx = localCodebookNo == 1 ? 0 : li;
-      NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]);
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-      float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator);
 #else
-      float *lcptr = (float*)&lcentroid[0];
+	objectData[idx * noobjs + oidx] = invertedIndexObjects[oidx].localID[idx];
 #endif
-
-      float *lcendptr = lcptr + localDataSize - 3;
-      __m128 sum = _mm_setzero_ps();
-      while (lcptr < lcendptr) {
-	__m128i x1 = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i const*)optr));
-	__m128i x2 = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i const*)gcptr));
-	x1 = _mm_sub_epi32(x1, x2);
-	__m128 sub = _mm_sub_ps(_mm_cvtepi32_ps(x1), _mm_loadu_ps(lcptr));
-	sum = _mm_add_ps(sum, _mm_mul_ps(sub, sub));
-	optr += 4;
-	gcptr += 4;
-	lcptr += 4;
-      }
-      __attribute__((aligned(32))) float f[4];
-      _mm_store_ps(f, sum);
-      double d = f[0] + f[1] + f[2] + f[3];
-      while (lcptr < lcendptr) {
-	double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++;
-	d += sub * sub;
       }
-      distance += d;
     }
-    distance = sqrt(distance);
-    return distance;
+  }
+
+  uint8_t getQuantizedObject(size_t dataNo, size_t subvectorNo) {
+    size_t blkNo = dataNo / NGTQ_SIMD_BLOCK_SIZE;	
+    size_t oft = dataNo - blkNo * NGTQ_SIMD_BLOCK_SIZE;	
+    return stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft];
   }
 #endif
 
-  template <typename T>
+  uint8_t* compressIntoUint4() {
+    size_t idx = 0;
+    size_t uint4StreamSize = streamSize / 2;
+    uint8_t *uint4Objects = new uint8_t[uint4StreamSize]();
+    while (idx < streamSize) {
+      for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) {
+	for (size_t bidx = 0; bidx < NGTQ_SIMD_BLOCK_SIZE; bidx++) {
+	  if (idx / 2 > uint4StreamSize) {
+	    std::stringstream msg;
+	    msg << "Quantizer::compressIntoUint4: Fatal inner error! " << (idx / 2) << ":" << uint4StreamSize;
+	    NGTThrowException(msg);
+	  }
+	  if (idx % 2 == 0) {
+	    uint4Objects[idx / 2] = stream[idx];
+	  } else {
+	    uint4Objects[idx / 2] |= (stream[idx] << 4);
+	  }
+	  idx++;
+	}
+      }
+    }
+    return uint4Objects;
+  }
+
+  void uncompressFromUint4(uint8_t *uint4Objects) {
+    size_t idx = 0;
+    size_t uint4StreamSize = streamSize / 2;
+    while (idx < streamSize) {
+      for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) {
+	for (size_t bidx = 0; bidx < NGTQ_SIMD_BLOCK_SIZE; bidx++) {
+	  if (idx / 2 > uint4StreamSize) {
+	    std::stringstream msg;
+	    msg << "Quantizer::uncompressFromUint4: Fatal inner error! " << (idx / 2) << ":" << uint4StreamSize;
+	    NGTThrowException(msg);
+	  }
+	  if (idx % 2 == 0) {
+	    stream[idx] = uint4Objects[idx / 2] & 0x0f;
+	  } else {
+	    stream[idx] = uint4Objects[idx / 2] >> 4;
+	  }
+	  idx++;
+	}
+      }
+    }
+  }
+
+#ifdef NGTQ_QBG
+  void restoreToInvertedIndex(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+    std::cerr << "Not implemented." << std::endl;
+    abort();
+#else
+    invertedIndexObjects.resize(numOfAlignedObjects);
+    for (size_t oidx = 0; oidx < numOfAlignedObjects; oidx++) {
+      for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) {
+	invertedIndexObjects[oidx].localID[lidx] = getQuantizedObject(oidx, lidx) + 1;
+      }
+    }
+    invertedIndexObjects.resize(numOfObjects);
+#endif
+  }
+#endif
+
+  uint8_t* getStream() {
+    auto s = stream;
+    stream = 0;
+    return s;
+  }
+
+  size_t getUint4StreamSize(size_t nOfObjects) {
+    numOfObjects = nOfObjects;
+    setStreamSize();
+    return streamSize / 2;
+  }
+
+  size_t getStreamSize(size_t nOfObjects) {
+    numOfObjects = nOfObjects;
+    setStreamSize();
+    return streamSize;
+  }
+
+  uint8_t	*stream;
+  size_t	numOfAlignedSubvectors;
+  size_t	alignedBlockSize;
+  size_t	numOfAlignedObjects;
+  size_t	numOfObjects;
+  size_t	streamSize;
+};
+
+/////
+class Quantizer;
+
+template<typename TYPE> 
+class ObjectProcessingStream {
+ public:
+  ObjectProcessingStream(size_t dimension, size_t nOfObjects, NGTQ::Quantizer &q): quantizer(q) {
+    initialize(dimension);
+    numOfObjects = nOfObjects;
+    setStreamSize();
+    stream = new uint8_t[streamSize]();
+  }
+
+  ObjectProcessingStream(size_t dim): quantizer(*reinterpret_cast<NGTQ::Quantizer*>(0)) {
+    initialize(dim);
+    stream = 0;
+  }
+
+  ~ObjectProcessingStream() {
+    delete[] stream;
+  }
+
+  void initialize(size_t dim) {
+    dimension = dim;
+  }
+
+  void setStreamSize() {
+    streamSize = sizeof(TYPE) * dimension * numOfObjects;
+    return;
+  }
+
+  void arrangeObject(size_t dataNo, void *object) {
+    if (dataNo >= numOfObjects) {
+      std::stringstream msg;
+      msg << "The data index is out of the range. " << dataNo << ":" << numOfObjects;
+      NGTThrowException(msg);
+    }
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+    abort();
+#else
+    memcpy(stream + dataNo * dimension * sizeof(TYPE), object, dimension * sizeof(TYPE));
+#endif
+  }
+
+  void arrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects);
+
+  uint8_t* getStream() {
+    auto s = stream;
+    stream = 0;
+    return s;
+  }
+
+  size_t getStreamSize(size_t nOfObjects) {
+    numOfObjects = nOfObjects;
+    setStreamSize();
+    return streamSize;
+  }
+
+  uint8_t	*stream;
+  Quantizer	&quantizer;
+  size_t	dimension;
+  size_t	numOfObjects ;
+  size_t	streamSize;
+};
+
+class ScalarQuantizedInt8ObjectProcessingStream {
+ public:
+  ScalarQuantizedInt8ObjectProcessingStream(NGTQ::Quantizer &q): quantizer(q) {}
+
+  ScalarQuantizedInt8ObjectProcessingStream(size_t dimension, size_t nOfObjects,
+					    const std::type_info *tinfo, NGTQ::Quantizer &q): quantizer(q) {
+    initialize(dimension, nOfObjects, tinfo);
+    stream = new uint8_t[streamSize]();
+  }
+
+  ScalarQuantizedInt8ObjectProcessingStream(size_t dim): quantizer(*reinterpret_cast<NGTQ::Quantizer*>(0)) {
+    initialize(dim, 0);
+    stream = 0;
+  }
+
+  ~ScalarQuantizedInt8ObjectProcessingStream() {
+    delete[] stream;
+  }
+
+  void initialize(size_t dim, size_t nOfObjects, const std::type_info *typeInfo = 0) {
+    dimension = dim;
+    numOfObjects = nOfObjects;
+    setStreamSize();
+    dataTypeInfo = typeInfo;
+  }
+
+  virtual void arrangeObject(size_t dataNo, std::vector<float> &object, float scale, float offset, bool shift) {
+    if (dataNo >= numOfObjects) {
+      std::stringstream msg;
+      msg << "The data index is out of the range. " << dataNo << ":" << numOfObjects;
+      NGTThrowException(msg);
+    }
+    NGT::ObjectSpace::quantizeToQint8(object, *dataTypeInfo, dimension, offset, scale, shift);
+    for (size_t i = 0; i < object.size(); i++) {
+      *(stream + dataNo * dimension * sizeof(uint8_t) + i) = static_cast<uint8_t>(object[i]);
+    }
+  }
+
+  void arrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects);
+
+  uint8_t* getStream() {
+    auto s = stream;
+    stream = 0;
+    return s;
+  }
+
+  void setStreamSize() {
+    streamSize = sizeof(uint8_t) * dimension * numOfObjects;
+    return;
+  }
+
+  size_t getStreamSize(size_t nOfObjects) {
+    numOfObjects = nOfObjects;
+    setStreamSize();
+    return streamSize;
+  }
+
+  uint8_t	*stream;
+  Quantizer	&quantizer;
+  size_t	dimension;
+  size_t	numOfObjects ;
+  size_t	streamSize;
+  const std::type_info* dataTypeInfo;
+};
+
+class ScalarQuantizedUint8TransposedObjectProcessingStream : public ScalarQuantizedInt8ObjectProcessingStream {
+ public:
+  ScalarQuantizedUint8TransposedObjectProcessingStream(size_t dimension, size_t nOfObjects, NGTQ::Quantizer &q):
+   ScalarQuantizedInt8ObjectProcessingStream(q) {
+    initialize(dimension, nOfObjects);
+    stream = new uint8_t[streamSize]();
+  }
+
+  ScalarQuantizedUint8TransposedObjectProcessingStream(size_t dim):
+   ScalarQuantizedInt8ObjectProcessingStream(*reinterpret_cast<NGTQ::Quantizer*>(0)) {
+    initialize(dim, 0);
+    stream = 0;
+  }
+
+  void initialize(size_t dim, size_t nOfObjects) {
+    dimension = dim;
+    numOfObjects = nOfObjects;
+    smallBlockSize = 16;
+    blockSize = 64;
+    lineSize = blockSize * nOfObjects;
+    setStreamSize();
+  }
+
+  void arrangeObject(size_t dataNo, std::vector<float> &object, float scale, float offset, bool shift) {
+    if (dataNo >= numOfObjects) {
+      std::stringstream msg;
+      msg << "The data index is out of the range. " << dataNo << ":" << numOfObjects;
+      NGTThrowException(msg);
+    }
+    NGT::ObjectSpace::quantizeToQint8(object, *dataTypeInfo, dimension, offset, scale, shift);
+    size_t endOfBlock = ((object.size() - 1) / blockSize) * blockSize;
+    for (size_t i = 0; i < endOfBlock; i++) {
+      size_t idx = lineSize * (i / blockSize) + blockSize * dataNo + i % blockSize;
+      if (idx >= streamSize) {
+	std::stringstream msg;
+	msg << "Fatal inner error! (1) " << idx << ":" << streamSize << " " 
+	    << dataNo << ":" << i << "/" << object.size();
+	NGTThrowException(msg);
+      }
+      stream[idx] = static_cast<uint8_t>(object[i]);
+    }
+    for (size_t i = endOfBlock; i < object.size(); i++) {
+      size_t idx = lineSize * (i / blockSize) + (i - endOfBlock) / smallBlockSize * (smallBlockSize * numOfObjects) + smallBlockSize * dataNo + (i - endOfBlock) % smallBlockSize;
+      if (idx >= streamSize) {
+	std::stringstream msg;
+	msg << "Fatal inner error! (2) " << idx << ":" << streamSize << " " 
+	    << dataNo << ":" << i << "/" << object.size();
+	NGTThrowException(msg);
+      }
+      stream[idx] = static_cast<uint8_t>(object[i]);
+    }
+  }
+
+  void setStreamSize() {
+    streamSize = ((dimension - 1) / smallBlockSize + 1) * smallBlockSize * numOfObjects;
+    return;
+  }
+
+  size_t getStreamSize(size_t nOfObjects) {
+    numOfObjects = nOfObjects;
+    setStreamSize();
+    return streamSize;
+  }
+  size_t smallBlockSize;
+  size_t blockSize;
+  size_t lineSize;
+};
+///// 
+
+class QuantizedObjectDistance {
+public:
+  class DistanceLookupTable {
+  public:
+    DistanceLookupTable():localDistanceLookup(0) {}
+    ~DistanceLookupTable() {
+      if (localDistanceLookup != 0) {
+	delete[] localDistanceLookup;
+	localDistanceLookup = 0;
+      }
+    }
+    bool isValid(size_t idx) {
+#ifdef NGTQ_QBG
+      std::cerr << "isValid() is not implemented" << std::endl;
+      abort();
+#else
+      return flag[idx];
+#endif
+    }
+#ifndef NGTQ_DISTANCE_ANGLE
+    void set(size_t idx, double d) {
+#ifndef NGTQ_QBG
+      flag[idx] = true;
+#endif
+      localDistanceLookup[idx] = d;
+    }
+    double getDistance(size_t idx) { return localDistanceLookup[idx]; }
+#endif
+    void initialize(size_t s) {
+      size = s;
+#ifdef NGTQ_DISTANCE_ANGLE
+      localDistanceLookup = new LocalDistanceLookup[size];
+#else
+      localDistanceLookup = new float[size];
+#endif
+#ifndef NGTQ_QBG
+      flag.resize(size, false);
+#endif
+    }
+
+#ifdef NGTQ_DISTANCE_ANGLE
+    LocalDistanceLookup	*localDistanceLookup;
+#else
+    float		*localDistanceLookup;
+#endif
+    size_t		size;
+#ifndef NGTQ_QBG
+    vector<bool>	flag;	
+#endif
+  };
+
+  class DistanceLookupTableUint8 {
+  public:
+    DistanceLookupTableUint8():localDistanceLookup(0) {}
+    ~DistanceLookupTableUint8() {
+      if (localDistanceLookup != 0) {
+	delete[] localDistanceLookup;
+	localDistanceLookup = 0;
+	delete[] scales;
+	delete[] offsets;
+      }
+    }
+    void initialize(size_t numOfSubspaces, size_t localCodebookCentroidNo) {
+      size_t numOfAlignedSubvectors = ((numOfSubspaces - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE;
+      size = numOfAlignedSubvectors * localCodebookCentroidNo;
+      localDistanceLookup = new uint8_t[size];
+      scales = new float[numOfAlignedSubvectors];
+      offsets = new float[numOfAlignedSubvectors];
+      range512 = (numOfSubspaces >> 2) * step512;
+      range256 = (((numOfSubspaces - 1) >> 1) + 1) * step256;
+    }
+
+    uint8_t		*localDistanceLookup;
+    size_t		size;
+    size_t		aslignedNumOfSubspaces;
+    size_t		localCodebookCentroidNo;
+    float		*scales;
+    float		*offsets;
+    float		totalOffset;
+    size_t		range512;
+    size_t		range256;
+    static constexpr size_t		step512 = 32;
+    static constexpr size_t		step256 = 16;
+  };
+#ifdef NGT_IVI
+  QuantizedObjectDistance(Quantizer &q): quantizer(q) {}
+  QuantizedObjectDistance(): quantizer(*reinterpret_cast<Quantizer*>(0)){}
+#else
+  QuantizedObjectDistance(){}
+#endif
+
+  virtual ~QuantizedObjectDistance() {
+    delete[] localCentroids;
+    delete[] localCentroidsForSIMD;
+  }
+
+  virtual double operator()(NGT::Object &object, size_t objectID, void *localID) = 0;
+
+  virtual double operator()(void *localID, DistanceLookupTable &distanceLUT) = 0;
+
+#ifdef NGTQBG_MIN
+  virtual float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) = 0;
+#else
+  virtual void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) = 0;
+#endif
+#ifdef NGTQBG_MIN
+  virtual float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) = 0;
+#else
+  virtual void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) = 0;
+#endif
+  virtual double operator()(NGT::Object &object, size_t objectID, void *localID, DistanceLookupTable &distanceLUT) = 0;
+
+  template <typename T>
+  inline double getAngleDistanceUint8(NGT::Object &object, size_t objectID, T localID[]) {
+    assert(globalCodebookIndex != 0);
+    NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
+    size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject();
+    size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+    unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator);
+#else
+    unsigned char *gcptr = &gcentroid[0];
+#endif
+    unsigned char *optr = &((NGT::Object&)object)[0];
+    double normA = 0.0F;
+    double normB = 0.0F;
+    double sum = 0.0F;
+    for (size_t li = 0; li < localDivisionNo; li++) {
+      size_t idx = localCodebookNo == 1 ? 0 : li;
+      NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+      float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator);
+#else
+      float *lcptr = (float*)&lcentroid[0];
+#endif
+      float *lcendptr = lcptr + localDataSize;
+      while (lcptr != lcendptr) {
+	double a = *optr++;
+	double b = *gcptr++ + *lcptr++;
+	normA += a * a;
+	normB += b * b;
+	sum += a * b;
+      }
+    }
+    double cosine = sum / (sqrt(normA) * sqrt(normB));
+    if (cosine >= 1.0F) {
+      return 0.0F;
+    } else if (cosine <= -1.0F) {
+      return acos(-1.0F);
+    }
+    return acos(cosine);
+  }
+
+#if defined(NGT_NO_AVX)
+  template <typename T>
+  inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) {
+    assert(globalCodebookIndex != 0);
+    NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
+    size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject();
+    size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+    unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator);
+#else
+    unsigned char *gcptr = &gcentroid[0];
+#endif
+    unsigned char *optr = &((NGT::Object&)object)[0];
+    double distance = 0.0;
+    for (size_t li = 0; li < localDivisionNo; li++) {
+      size_t idx = localCodebookNo == 1 ? 0 : li;
+      NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+      float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator);
+#else
+      float *lcptr = (float*)&lcentroid[0];
+#endif
+      double d = 0.0;
+      float *lcendptr = lcptr + localDataSize;
+      while (lcptr != lcendptr) {
+	double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++;
+	d += sub * sub;
+      }
+      distance += d;
+    }
+    return sqrt(distance);
+  }
+#else
+  template <typename T>
+  inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) {
+    assert(globalCodebookIndex != 0);
+    NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
+    size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject();
+    size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+    unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator);
+#else
+    unsigned char *gcptr = &gcentroid[0];
+#endif
+    unsigned char *optr = &((NGT::Object&)object)[0];
+    double distance = 0.0;
+    for (size_t li = 0; li < localDivisionNo; li++) {
+      size_t idx = localCodebookNo == 1 ? 0 : li;
+      NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+      float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator);
+#else
+      float *lcptr = (float*)&lcentroid[0];
+#endif
+
+      float *lcendptr = lcptr + localDataSize - 3;
+      __m128 sum = _mm_setzero_ps();
+      while (lcptr < lcendptr) {
+	__m128i x1 = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i const*)optr));
+	__m128i x2 = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i const*)gcptr));
+	x1 = _mm_sub_epi32(x1, x2);
+	__m128 sub = _mm_sub_ps(_mm_cvtepi32_ps(x1), _mm_loadu_ps(lcptr));
+	sum = _mm_add_ps(sum, _mm_mul_ps(sub, sub));
+	optr += 4;
+	gcptr += 4;
+	lcptr += 4;
+      }
+      __attribute__((aligned(32))) float f[4];
+      _mm_store_ps(f, sum);
+      double d = f[0] + f[1] + f[2] + f[3];
+      while (lcptr < lcendptr) {
+	double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++;
+	d += sub * sub;
+      }
+      distance += d;
+    }
+    distance = sqrt(distance);
+    return distance;
+  }
+#endif
+
+  template <typename T>
   inline double getAngleDistanceFloat(NGT::Object &object, size_t objectID, T localID[]) {
     assert(globalCodebookIndex != 0);
     NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
@@ -1235,8 +1678,8 @@ class QuantizedObjectDistance {
       dlu++;
       for (size_t k = 1; k < localCodebookCentroidNo; k++) {
 	NGT::Object &lcentroid = (NGT::Object&)*localCodebookIndexes[li].getObjectSpace().getRepository().get(k);
-	float *lcptr = (float*)&lcentroid[0];
-	float *lcendptr = lcptr + localDataSize;
+	float *lcptr = (float*)&lcentroid[0];		
+	float *lcendptr = lcptr + localDataSize;	
 	float *toptr = optr + oft;
 	float *tgcptr = gcptr + oft;
 	double normA = 0.0F;
@@ -1255,11 +1698,13 @@ class QuantizedObjectDistance {
     }
   }
 #else
-  inline void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) {
+
+  virtual void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) {
     void *objectPtr = &((NGT::Object&)object)[0];
     createDistanceLookup(objectPtr, objectID, distanceLUT);
   }
-  inline void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTable &distanceLUT) {
+
+  virtual void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTable &distanceLUT) {
     assert(globalCodebookIndex != 0);
 #ifdef NGTQ_QBG
     void *globalCentroid = quantizationCodebook->data(objectID);
@@ -1288,7 +1733,7 @@ class QuantizedObjectDistance {
       lut++;
       lcptr += localDataSize;
       for (size_t k = 1; k < localCodebookCentroidNo; k++) {
-	float *lcendptr = lcptr + localDataSize;
+	float *lcendptr = lcptr + localDataSize;	
 	float *toptr = optr + oft;
 	float *tgcptr = gcptr + oft;
 	float d = 0.0;
@@ -1563,7 +2008,7 @@ class QuantizedObjectDistance {
       *lut++ = 0;
       lcptr += localDataSize;
       for (size_t k = 1; k < localCodebookCentroidNo; k++) {
-	float *lcendptr = lcptr + localDataSize;
+	float *lcendptr = lcptr + localDataSize;	
 	float *toptr = optr + oft;
 #if !defined(NGTQG_ZERO_GLOBAL)
 	float *tgcptr = gcptr + oft;
@@ -1584,11 +2029,11 @@ class QuantizedObjectDistance {
 
   }
 
-  inline void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTableUint8 &distanceLUT) {
+  virtual void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTableUint8 &distanceLUT) {
     void *objectPtr = &((NGT::Object&)object)[0];
     createDistanceLookup(objectPtr, objectID, distanceLUT);
   }
-  inline void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTableUint8 &distanceLUT) {
+  virtual void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTableUint8 &distanceLUT) {
     assert(globalCodebookIndex != 0);
     size_t sizeOfObject = dimension * sizeOfType;
 #ifdef NGTQG_DOT_PRODUCT
@@ -1674,6 +2119,12 @@ class QuantizedObjectDistance {
     c.initialize(localCodebookNo, localCodebookCentroidNo);
   }
 
+  virtual uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) = 0;
+  virtual void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects,
+					size_t numOfSubspaces, std::vector<uint32_t> &ids, void *objects) = 0;
+  virtual size_t getNumOfAlignedObjects(size_t noOfObjects) = 0;
+  virtual size_t getSizeOfCluster(size_t noOfObjects) = 0;
+
   NGT::Index	*globalCodebookIndex;
   NGT::Index	*localCodebookIndexes;
   size_t	localDivisionNo;
@@ -1686,12 +2137,16 @@ class QuantizedObjectDistance {
   vector<float>	globalCentroid;
   QuantizationCodebook<float>	*quantizationCodebook;
   
-  float		*localCentroids;
-  float		*localCentroidsForSIMD;
+  float		*localCentroids;	
+  float		*localCentroidsForSIMD;	
 
   size_t	localCodebookCentroidNoSIMD;
 
   Rotation	*rotation;
+
+#ifdef NGT_IVI
+  Quantizer	&quantizer;
+#endif
 };
 
 template <typename T>
@@ -1733,7 +2188,7 @@ class QuantizedObjectDistanceUint8 : public QuantizedObjectDistance {
     for (size_t li = 0; li < localDivisionNo; li++) {
       distance += distanceLUT.getDistance(li * localCodebookCentroidNo + localID[li]);
     }
-    return sqrt(distance);
+    return sqrt(distance);	
   }
 
   inline double operator()(NGT::Object &object, size_t objectID, void *l) {
@@ -1774,16 +2229,48 @@ class QuantizedObjectDistanceUint8 : public QuantizedObjectDistance {
 	distanceLUT.set(li * localCodebookCentroidNo + localID[li], d);
       }
     }
-    return sqrt(distance);
+    return sqrt(distance);	
+  }
+#ifdef NGTQBG_MIN
+  inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
+#else
+  inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
+#endif
+    cerr << "operator is not implemented" << endl;
+    abort();
   }
+
 #ifdef NGTQBG_MIN
-  inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) {
+  inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
 #else
-  inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) {
+    inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
 #endif
     cerr << "operator is not implemented" << endl;
     abort();
   }
+
+  uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+    NGTThrowException("Not implemented");
+#ifdef NGTQ_QBG
+    QuantizedObjectProcessingStream quantizedStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size());
+    quantizedStream.arrange(invertedIndexObjects);
+    return quantizedStream.compressIntoUint4();
+#else
+    return 0;
+#endif
+  }
+  void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects,
+				size_t numOfSubspaces, std::vector<uint32_t> &ids, void *objects) {
+    NGTThrowException("Not implemented");
+  }
+  size_t getNumOfAlignedObjects(size_t noOfObjects) {
+    abort();
+    return 0;
+  }
+  size_t getSizeOfCluster(size_t noOfObjects) {
+    abort();
+    return 0;
+  }
 #endif
 
 };
@@ -1831,7 +2318,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance {
       localID++;
       lut += localCodebookCentroidNo;
     }
-    return sqrt(distance);
+    return sqrt(distance);	
   }
 
 
@@ -1849,24 +2336,158 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance {
       b = _mm256_max_ps(b, mask);
       data = _mm256_min_ps(data, b);
     }
-
-    data = _mm256_min_ps(data, (__m256)_mm256_permute4x64_epi64((__m256i)data, _MM_SHUFFLE(3, 2, 3, 2)));
-    data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 8));
-    data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 4));
-
-    return data[0];
+
+    data = _mm256_min_ps(data, (__m256)_mm256_permute4x64_epi64((__m256i)data, _MM_SHUFFLE(3, 2, 3, 2)));
+    data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 8));
+    data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 4));
+
+    return data[0];
+  }
+#endif
+  
+#if defined(NGTQG_AVX512) || defined(NGTQG_AVX2)
+#if defined(NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION)
+#ifdef NGTQBG_MIN
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
+#else
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
+#endif
+
+
+    uint8_t *localID = static_cast<uint8_t*>(inv);
+    float *d = distances;
+#ifdef NGTQBG_MIN
+    float *lastd = distances + noOfObjects;
+    float min = std::numeric_limits<float>::max();
+#endif
+#if defined(NGTQG_AVX512)
+    const __m512i mask512x0F = _mm512_set1_epi16(0x000f);
+    const __m512i mask512xF0 = _mm512_set1_epi16(0x00f0);
+    const size_t range512 = distanceLUT.range512;
+    auto step512 = distanceLUT.step512;
+#endif
+    const __m256i mask256x0F = _mm256_set1_epi16(0x000f);
+    const __m256i mask256xF0 = _mm256_set1_epi16(0x00f0);
+    const size_t range256 = distanceLUT.range256;
+    auto step256 = distanceLUT.step256;
+    auto *last = localID + range256 / NGTQ_SIMD_BLOCK_SIZE * noOfObjects;
+    while (localID < last) {
+      uint8_t *lut = distanceLUT.localDistanceLookup;
+      auto *lastgroup256 = localID + range256;
+#if defined(NGTQG_AVX512)
+      __m512i depu16 = _mm512_setzero_si512();
+      auto *lastgroup512 = localID + range512;
+      while (localID < lastgroup512) {
+	__m512i lookupTable = _mm512_loadu_si512((__m512i const*)lut);
+	_mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0);
+	__m512i packedobj = _mm512_cvtepu8_epi16(_mm256_loadu_si256((__m256i const*)&localID[0]));
+	__m512i lo = _mm512_and_si512(packedobj, mask512x0F);
+	__m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4);
+	__m512i obj = _mm512_or_si512(lo, hi);
+	__m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj);
+        depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 0)));
+	depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 1)));
+	lut += (localCodebookCentroidNo - 1) * 4;
+	localID += step512;
+      }
+#else
+      __m256i depu16l = _mm256_setzero_si256();
+      __m256i depu16h = _mm256_setzero_si256();
+#endif
+      while (localID < lastgroup256) {
+	__m256i lookupTable = _mm256_loadu_si256((__m256i const*)lut);
+	_mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0);
+	__m256i packedobj = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)&localID[0]));
+	__m256i lo = _mm256_and_si256(packedobj, mask256x0F);
+	__m256i hi = _mm256_slli_epi16(_mm256_and_si256(packedobj, mask256xF0), 4);
+	__m256i obj = _mm256_or_si256(lo, hi);
+	__m256i vtmp = _mm256_shuffle_epi8(lookupTable, obj);
+
+#if defined(NGTQG_AVX512)
+        depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(vtmp));
+#else
+	depu16l = _mm256_adds_epu16(depu16l, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 0)));
+	depu16h = _mm256_adds_epu16(depu16h, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 1)));
+#endif
+	lut += (localCodebookCentroidNo - 1) * 2;
+	localID += step256;
+      }
+#if defined(NGTQG_AVX512)
+      __m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0));
+      __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1));
+
+      __m512 distance = _mm512_cvtepi32_ps(_mm512_add_epi32(lo, hi));
+      __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
+      distance = _mm512_mul_ps(distance, scale);
+      distance = _mm512_add_ps(distance, _mm512_set1_ps(distanceLUT.totalOffset));
+#if defined(NGTQG_DOT_PRODUCT)
+      float one = 1.0;
+      float two = 2.0;
+      distance = _mm512_mul_ps(_mm512_sub_ps(_mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distance), _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&two)));
+#endif
+      distance = _mm512_sqrt_ps(distance);
+      _mm512_storeu_ps(d, distance);
+#ifdef NGTQBG_MIN
+      {
+	float tmpmin;
+	int rest = 16 - (lastd - d);
+	if (rest > 0) {
+	  __mmask16 mask = 0xffff;
+	  mask >>= rest;
+	  tmpmin = _mm512_mask_reduce_min_ps(mask, distance);
+	} else {
+	  tmpmin = _mm512_reduce_min_ps(distance);
+	}
+	//std::cerr << "tmpmin=" << tmpmin << std::endl;
+	if (min > tmpmin) min = tmpmin;
+      }
+#endif
+#else
+      __m256i lol = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 0));
+      __m256i loh = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 1));
+      __m256i hil = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 0));
+      __m256i hih = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 1));
+      __m256 distancel = _mm256_cvtepi32_ps(_mm256_add_epi32(lol, hil));
+      __m256 distanceh = _mm256_cvtepi32_ps(_mm256_add_epi32(loh, hih));
+      __m256 scalel = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
+      __m256 scaleh = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
+      distancel = _mm256_mul_ps(distancel, scalel);
+      distancel = _mm256_add_ps(distancel, _mm256_set1_ps(distanceLUT.totalOffset));
+      distanceh = _mm256_mul_ps(distanceh, scaleh);
+      distanceh = _mm256_add_ps(distanceh, _mm256_set1_ps(distanceLUT.totalOffset));
+#if defined(NGTQG_DOT_PRODUCT)
+      float one = 1.0;
+      float two = 2.0;
+      distancel = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distancel), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two)));
+      distanceh = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distanceh), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two)));
+#endif
+      distancel = _mm256_sqrt_ps(distancel);
+      distanceh = _mm256_sqrt_ps(distanceh);
+      _mm256_storeu_ps(d, distancel);
+      _mm256_storeu_ps(d + 8, distanceh);
+#ifdef NGTQBG_MIN
+      {
+	float tmpmin = horizontalMin(distancel, distanceh, lastd - d);
+	if (min > tmpmin) min = tmpmin;
+      }
+#endif
+#endif
+      d += 16;
+    }
+#ifdef NGTQBG_MIN
+    return min;
+#endif
   }
+#else /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION  ////////////////////////////////////////
+#ifndef NGT_AVX512
+#error "AVX512 is *NOT* defined. *INDIVIDUAL* scale offset compression is available only for AVX512!"
 #endif
-  
-#if defined(NGTQG_AVX512) || defined(NGTQG_AVX2)
-#if defined(NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION)
 #ifdef NGTQBG_MIN
-  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) {
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
 #else
-  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) {
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
 #endif
 
-
     uint8_t *localID = static_cast<uint8_t*>(inv);
     float *d = distances;
 #ifdef NGTQBG_MIN
@@ -1874,8 +2495,8 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance {
     float min = std::numeric_limits<float>::max();
 #endif
 #if defined(NGTQG_AVX512)
-    const __m512i mask512x0F = _mm512_set1_epi16(0x000f);
-    const __m512i mask512xF0 = _mm512_set1_epi16(0x00f0);
+    __m512i mask512x0F = _mm512_set1_epi16(0x000f);
+    __m512i mask512xF0 = _mm512_set1_epi16(0x00f0);
     const size_t range512 = distanceLUT.range512;
     auto step512 = distanceLUT.step512;
 #endif
@@ -1886,9 +2507,11 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance {
     auto *last = localID + range256 / NGTQ_SIMD_BLOCK_SIZE * noOfObjects;
     while (localID < last) {
       uint8_t *lut = distanceLUT.localDistanceLookup;
+      float *scales = distanceLUT.scales;
       auto *lastgroup256 = localID + range256;
+      __m512 distance = _mm512_setzero_ps();
 #if defined(NGTQG_AVX512)
-      __m512i depu16 = _mm512_setzero_si512();
+      //__m512i depu16 = _mm512_setzero_si512();
       auto *lastgroup512 = localID + range512;
       while (localID < lastgroup512) {
 	__m512i lookupTable = _mm512_loadu_si512((__m512i const*)lut);
@@ -1898,9 +2521,22 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance {
 	__m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4);
 	__m512i obj = _mm512_or_si512(lo, hi);
 	__m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj);
-        depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 0)));
-	depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 1)));
+
+	__m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 0)));
+	__m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0]));
+	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
+	d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 1)));
+	scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1]));
+	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
+	d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 2)));
+	scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[2]));
+	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
+	d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 3)));
+	scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[3]));
+	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
+
 	lut += (localCodebookCentroidNo - 1) * 4;
+	scales += 4;
 	localID += step512;
       }
 #else
@@ -1910,28 +2546,36 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance {
       while (localID < lastgroup256) {
 	__m256i lookupTable = _mm256_loadu_si256((__m256i const*)lut);
 	_mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0);
+	//std::cerr << "obj=" << (int)(localID[0] & 0x0f) << "," << (int)((localID[0] >> 4) & 0x0f) << std::endl;
 	__m256i packedobj = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)&localID[0]));
 	__m256i lo = _mm256_and_si256(packedobj, mask256x0F);
 	__m256i hi = _mm256_slli_epi16(_mm256_and_si256(packedobj, mask256xF0), 4);
 	__m256i obj = _mm256_or_si256(lo, hi);
+	//std::cerr << "LUT=" << (int)*lut << "," << (int)*(lut+1) << std::endl;
 	__m256i vtmp = _mm256_shuffle_epi8(lookupTable, obj);
 
 #if defined(NGTQG_AVX512)
-        depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(vtmp));
+	__m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm256_extracti32x4_epi32(vtmp, 0)));
+	__m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0]));
+	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
+	d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm256_extracti32x4_epi32(vtmp, 1)));
+	scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1]));
+	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
+	////////////////////
 #else
 	depu16l = _mm256_adds_epu16(depu16l, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 0)));
 	depu16h = _mm256_adds_epu16(depu16h, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 1)));
 #endif
 	lut += (localCodebookCentroidNo - 1) * 2;
+	scales += 2;
 	localID += step256;
       }
-#if defined(NGTQG_AVX512)
-      __m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0));
-      __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1));
 
-      __m512 distance = _mm512_cvtepi32_ps(_mm512_add_epi32(lo, hi));
-      __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
-      distance = _mm512_mul_ps(distance, scale);
+#if defined(NGTQG_AVX512)
+      //__m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0));
+      //__m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1));
+      //__m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
+      //distance = _mm512_mul_ps(distance, scale);
       distance = _mm512_add_ps(distance, _mm512_set1_ps(distanceLUT.totalOffset));
 #if defined(NGTQG_DOT_PRODUCT)
       float one = 1.0;
@@ -1962,6 +2606,9 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance {
       __m256i hih = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 1));
       __m256 distancel = _mm256_cvtepi32_ps(_mm256_add_epi32(lol, hil));
       __m256 distanceh = _mm256_cvtepi32_ps(_mm256_add_epi32(loh, hih));
+      __attribute__((aligned(32))) float v32[8];
+      _mm256_storeu_ps((float*)&v32, distancel);
+      _mm256_storeu_ps((float*)&v32, distanceh);
       __m256 scalel = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
       __m256 scaleh = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
       distancel = _mm256_mul_ps(distancel, scalel);
@@ -1974,295 +2621,584 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance {
       distancel = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distancel), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two)));
       distanceh = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distanceh), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two)));
 #endif
-      distancel = _mm256_sqrt_ps(distancel);
-      distanceh = _mm256_sqrt_ps(distanceh);
-      _mm256_storeu_ps(d, distancel);
-      _mm256_storeu_ps(d + 8, distanceh);
+      distancel = _mm256_sqrt_ps(distancel);
+      distanceh = _mm256_sqrt_ps(distanceh);
+      _mm256_storeu_ps(d, distancel);
+      _mm256_storeu_ps(d + 8, distanceh);
+#endif
+      d += 16;
+    }
+#ifdef NGTQBG_MIN
+    return min;
+#endif
+  }
+#endif /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION  ////////////////////////////////////////
+
+#else
+#ifdef NGTQBG_MIN
+  inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
+#else
+  inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
+#endif
+    uint8_t *localID = static_cast<uint8_t*>(inv);
+#ifdef NGTQBG_MIN
+    float min = std::numeric_limits<float>::max();
+#endif
+    size_t numOfAlignedSubvectors = ((localDivisionNo - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE;
+    size_t alignedSize = ((size - 1) / 2 + 1) * 2;
+    uint32_t d[NGTQ_SIMD_BLOCK_SIZE];
+    size_t didx = 0;
+    size_t byteSize = numOfAlignedSubvectors * alignedSize / 2;
+    auto *last = localID + byteSize;
+    while (localID < last) {
+      uint8_t *lut = distanceLUT.localDistanceLookup;
+      memset(d, 0, sizeof(uint32_t) * NGTQ_SIMD_BLOCK_SIZE);
+      for (size_t li = 0; li < numOfAlignedSubvectors; li++) {
+	for (size_t i = 0; i < NGTQ_SIMD_BLOCK_SIZE; i++) {
+	  uint8_t obj = *localID;
+	  if (i % 2 == 0) {
+	    obj &= 0x0f;
+	  } else {
+	    obj >>= 4;
+	    localID++;
+	  }
+	  d[i] += *(lut + obj);
+	}
+	lut += localCodebookCentroidNo - 1;
+      }
+      for (size_t i = 0; i < NGTQ_SIMD_BLOCK_SIZE; i++) {
+	distances[didx + i] = sqrt(static_cast<float>(d[i]) * distanceLUT.scales[0] + distanceLUT.totalOffset);
+#ifdef NGTQBG_MIN
+	if (min > distances[didx + i]) {
+	  min = distances[didx + i];
+	}
+#endif
+      }
+      didx += NGTQ_SIMD_BLOCK_SIZE;
+    }
+#ifdef NGTQBG_MIN
+    return min;
+#endif
+  }
+#endif
+
+
+#ifdef NGTQBG_MIN
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
+    return 0.0;
+#else
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
+#endif
+  }
+
+  inline double operator()(NGT::Object &object, size_t objectID, void *l) {
+    return getL2DistanceFloat(object, objectID, static_cast<T*>(l));
+  }
+  inline double operator()(NGT::Object &object, size_t objectID, void *l, DistanceLookupTable &distanceLUT) {
+    T *localID = static_cast<T*>(l);
+    NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+    float *gcptr = (float*)&gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator);
+#else
+    float *gcptr = (float*)&gcentroid[0];
+#endif
+    float *optr = (float*)&((NGT::Object&)object)[0];
+    double distance = 0.0;
+    for (size_t li = 0; li < localDivisionNo; li++) {
+      size_t distanceLUTidx = li * localCodebookCentroidNo + localID[li];
+      if (distanceLUT.isValid(distanceLUTidx)) {
+	distance += distanceLUT.getDistance(distanceLUTidx);
+	optr += localDataSize;
+	gcptr += localDataSize;
+      } else {
+        size_t idx = li;
+	NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+	float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator);
+#else
+	float *lcptr = (float*)&lcentroid[0];
+#endif
+#if defined(NGTQG_AVX512) || defined(NGTQG_AVX2)
+	float *lcendptr = lcptr + localDataSize;
+	__m256 sum256 = _mm256_setzero_ps();
+	__m256 v;
+	while (lcptr < lcendptr) {
+	  v = _mm256_sub_ps(_mm256_sub_ps(_mm256_loadu_ps(optr), _mm256_loadu_ps(gcptr)), _mm256_loadu_ps(lcptr));
+	  sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v, v));
+	  optr += 8;
+	  gcptr += 8;
+	  lcptr += 8;
+	}
+	__m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
+	__attribute__((aligned(32))) float f[4];
+	_mm_store_ps(f, sum128);
+	double d = f[0] + f[1] + f[2] + f[3];
+#else
+	float *lcendptr = lcptr + localDataSize;
+	double d = 0.0;
+	while (lcptr != lcendptr) {
+	  double sub = (*optr++ - *gcptr++) - *lcptr++;
+	  d += sub * sub;
+	}
+#endif
+	distance += d;
+      }
+    }
+    return sqrt(distance);	
+  }
+
+  uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+    if (invertedIndexObjects.numOfSubvectors != localDivisionNo) {
+      std::stringstream msg;
+      msg << "Internal fatal error. Invalid # of subvectos. " << invertedIndexObjects.numOfSubvectors << ":" << localDivisionNo;
+      NGTThrowException(msg);
+    }
+#ifdef NGTQ_QBG
+    QuantizedObjectProcessingStream quantizedStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size());
+    quantizedStream.arrange(invertedIndexObjects);
+    return quantizedStream.compressIntoUint4();
+#else
+    return 0;
+#endif
+  }
+  void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects,
+				size_t numOfSubspaces, std::vector<uint32_t> &ids, void *objects) {
+#ifdef NGTQ_QBG
+    NGTQ::QuantizedObjectProcessingStream quantizedStream(numOfSubspaces, ids.size());
+    quantizedStream.uncompressFromUint4(static_cast<uint8_t*>(objects));
+    invertedIndexObjects.initialize(numOfSubspaces);
+    quantizedStream.restoreToInvertedIndex(invertedIndexObjects);
+#endif
+  }
+  size_t getNumOfAlignedObjects(size_t noOfObjects) {
+    return QuantizedObjectProcessingStream::getNumOfAlignedObjects(noOfObjects);
+  }
+  size_t getSizeOfCluster(size_t noOfObjects) {
+    QuantizedObjectProcessingStream quantizedStream(localDivisionNo);
+    return quantizedStream.getUint4StreamSize(noOfObjects);;
+  }
+#endif
+
+};
+
+class NonLocalQuantizedObjectDistance : public QuantizedObjectDistance {
+ public:
+  NonLocalQuantizedObjectDistance(Quantizer &q):QuantizedObjectDistance(q){}
+  virtual void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects,
+					size_t numOfSubspaces, std::vector<uint32_t> &ids, void *objects) {
+#ifdef NGTQ_QBG
+    invertedIndexObjects.initialize(0);
+    for (auto &id : ids) {
+      NGTQ::QuantizedObject quantizedObject;
+      quantizedObject.objectID = id;
+      invertedIndexObjects.pushBack(id, quantizedObject);
+    }
+#endif
+  }
+};
+
+#ifdef NGT_IVI
+  ///////////////////////////////////////////////
+template <typename T>
+class NonQuantizedObjectDistance : public NonLocalQuantizedObjectDistance {
+public:
+  NonQuantizedObjectDistance(Quantizer &q):NonLocalQuantizedObjectDistance(q){}
+  inline double operator()(void *l, DistanceLookupTable &distanceLUT) {
+    return 0.0;
+  }
+
+  ///-/ 近似距離計算 /////////////////////////////
+#ifdef NGTQBG_MIN
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
+#else
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) {
+#endif
 #ifdef NGTQBG_MIN
-      {
-	float tmpmin = horizontalMin(distancel, distanceh, lastd - d);
-	if (min > tmpmin) min = tmpmin;
-      }
+    float min = std::numeric_limits<float>::max();
 #endif
+    for (size_t idx = 0; idx < noOfObjects; idx++) {
+      distances[idx] = NGT::PrimitiveComparator::compareL2(static_cast<float*>(query),
+							   static_cast<float*>(inv) + (dimension * idx),
+							   dimension);
+#ifdef NGTQBG_MIN
+      if (distances[idx] < min) min = distances[idx];
 #endif
-      d += 16;
     }
 #ifdef NGTQBG_MIN
     return min;
 #endif
   }
 
-#else /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION  ////////////////////////////////////////
-#ifndef NGT_AVX512
-#error "AVX512 is *NOT* defined. *INDIVIDUAL* scale offset compression is available only for AVX512!"
-#endif
 #ifdef NGTQBG_MIN
-  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) {
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
+    return 0.0;
 #else
-  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) {
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
 #endif
+  }
 
-    uint8_t *localID = static_cast<uint8_t*>(inv);
-    float *d = distances;
-#ifdef NGTQBG_MIN
-    float *lastd = distances + noOfObjects;
-    float min = std::numeric_limits<float>::max();
-#endif
-#if defined(NGTQG_AVX512)
-    __m512i mask512x0F = _mm512_set1_epi16(0x000f);
-    __m512i mask512xF0 = _mm512_set1_epi16(0x00f0);
-    const size_t range512 = distanceLUT.range512;
-    auto step512 = distanceLUT.step512;
-#endif
-    const __m256i mask256x0F = _mm256_set1_epi16(0x000f);
-    const __m256i mask256xF0 = _mm256_set1_epi16(0x00f0);
-    const size_t range256 = distanceLUT.range256;
-    auto step256 = distanceLUT.step256;
-    auto *last = localID + range256 / NGTQ_SIMD_BLOCK_SIZE * noOfObjects;
-    while (localID < last) {
-      uint8_t *lut = distanceLUT.localDistanceLookup;
-      float *scales = distanceLUT.scales;
-      auto *lastgroup256 = localID + range256;
-      __m512 distance = _mm512_setzero_ps();
-#if defined(NGTQG_AVX512)
-      //__m512i depu16 = _mm512_setzero_si512();
-      auto *lastgroup512 = localID + range512;
-      while (localID < lastgroup512) {
-	__m512i lookupTable = _mm512_loadu_si512((__m512i const*)lut);
-	_mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0);
-	__m512i packedobj = _mm512_cvtepu8_epi16(_mm256_loadu_si256((__m256i const*)&localID[0]));
-	__m512i lo = _mm512_and_si512(packedobj, mask512x0F);
-	__m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4);
-	__m512i obj = _mm512_or_si512(lo, hi);
-	__m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj);
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTable &distanceLUT) {
+    NGTThrowException("Not implemented.");
+    return 0.0;
+  }
 
-	__m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 0)));
-	__m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0]));
-	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
-	d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 1)));
-	scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1]));
-	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
-	d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 2)));
-	scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[2]));
-	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
-	d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 3)));
-	scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[3]));
-	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
+  inline double operator()(NGT::Object &object, size_t objectID, void *l) {
+    NGTThrowException("Not implemented.");
+    return getL2DistanceFloat(object, objectID, static_cast<T*>(l));
+  }
+  inline double operator()(NGT::Object &object, size_t objectID, void *l, DistanceLookupTable &distanceLUT) {
+    NGTThrowException("Not implemented.");
+    return 0.0;
+  }
+  void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) {
+    void *objectPtr = &((NGT::Object&)object)[0];
+    createDistanceLookup(objectPtr, objectID, distanceLUT);
+  }
 
-	lut += (localCodebookCentroidNo - 1) * 4;
-	scales += 4;
-	localID += step512;
-      }
-#else
-      __m256i depu16l = _mm256_setzero_si256();
-      __m256i depu16h = _mm256_setzero_si256();
+  void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTable &distanceLUT) {
+  }
+
+  void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTableUint8 &distanceLUT) {
+  }
+
+  uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+    if (invertedIndexObjects.numOfSubvectors != localDivisionNo) {
+      std::stringstream msg;
+      msg << "Internal fatal error. Invalid # of subvectos. " << invertedIndexObjects.numOfSubvectors << ":" << localDivisionNo;
+      NGTThrowException(msg);
+    }
+    ObjectProcessingStream<T> processingStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size(), quantizer);
+    processingStream.arrange(invertedIndexObjects);
+    return processingStream.getStream();
+  }
+  void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects,
+				size_t numOfSubspaces, std::vector<uint32_t> &ids, void *objects) {
+#ifdef NGTQ_QBG
+    invertedIndexObjects.initialize(0);
+    for (auto id : ids) {
+      NGTQ::QuantizedObject quantizedObject;
+      quantizedObject.objectID = id;
+      invertedIndexObjects.pushBack(id, quantizedObject);
+    }
 #endif
-      while (localID < lastgroup256) {
-	__m256i lookupTable = _mm256_loadu_si256((__m256i const*)lut);
-	_mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0);
-	//std::cerr << "obj=" << (int)(localID[0] & 0x0f) << "," << (int)((localID[0] >> 4) & 0x0f) << std::endl;
-	__m256i packedobj = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)&localID[0]));
-	__m256i lo = _mm256_and_si256(packedobj, mask256x0F);
-	__m256i hi = _mm256_slli_epi16(_mm256_and_si256(packedobj, mask256xF0), 4);
-	__m256i obj = _mm256_or_si256(lo, hi);
-	//std::cerr << "LUT=" << (int)*lut << "," << (int)*(lut+1) << std::endl;
-	__m256i vtmp = _mm256_shuffle_epi8(lookupTable, obj);
+  }
+  size_t getNumOfAlignedObjects(size_t noOfObjects) { return noOfObjects; }
+  size_t getSizeOfCluster(size_t noOfObjects) {
+    ObjectProcessingStream<T> processingStream(localDivisionNo);
+    return processingStream.getStreamSize(noOfObjects);;
+  }
+};
 
-#if defined(NGTQG_AVX512)
-	__m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm256_extracti32x4_epi32(vtmp, 0)));
-	__m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0]));
-	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
-	d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm256_extracti32x4_epi32(vtmp, 1)));
-	scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1]));
-	distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale));
-	////////////////////
+template <typename QT, typename OT>
+class ScalarQuantizedInt8ObjectDistance : public NonLocalQuantizedObjectDistance {
+ public:
+  ScalarQuantizedInt8ObjectDistance(Quantizer &q, DistanceType dt):NonLocalQuantizedObjectDistance(q) {
+    setCompareFunction(dt);
+  }
+  ~ScalarQuantizedInt8ObjectDistance() {}
+
+  inline double operator()(void *l, DistanceLookupTable &distanceLUT) {
+    return 0.0;
+  }
+
+  ///-/ 近似距離計算 /////////////////////////////
+#ifdef NGTQBG_MIN
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) {
 #else
-	depu16l = _mm256_adds_epu16(depu16l, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 0)));
-	depu16h = _mm256_adds_epu16(depu16h, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 1)));
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) {
 #endif
-	lut += (localCodebookCentroidNo - 1) * 2;
-	scales += 2;
-	localID += step256;
+    if (query == 0) {
+      NGTThrowException("Fatal inner error! The specified query is invalid..");
+    }
+#ifdef NGTQBG_MIN
+    float min = std::numeric_limits<float>::max();
+#endif
+    for (size_t idx = 0; idx < noOfObjects; idx++) {
+      if (distances[idx] != 0.0) {
+	distances[idx] = std::numeric_limits<float>::max();
+	continue;
       }
-
-#if defined(NGTQG_AVX512)
-      //__m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0));
-      //__m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1));
-      //__m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
-      //distance = _mm512_mul_ps(distance, scale);
-      distance = _mm512_add_ps(distance, _mm512_set1_ps(distanceLUT.totalOffset));
-#if defined(NGTQG_DOT_PRODUCT)
-      float one = 1.0;
-      float two = 2.0;
-      distance = _mm512_mul_ps(_mm512_sub_ps(_mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distance), _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&two)));
+      distances[idx] = compare(query, static_cast<NGT::quint8*>(inv) + (dimension * idx), dimension);
+#ifdef NGTQBG_MIN
+      if (distances[idx] < min) min = distances[idx];
 #endif
-      distance = _mm512_sqrt_ps(distance);
-      _mm512_storeu_ps(d, distance);
+    }
 #ifdef NGTQBG_MIN
-      {
-	float tmpmin;
-	int rest = 16 - (lastd - d);
-	if (rest > 0) {
-	  __mmask16 mask = 0xffff;
-	  mask >>= rest;
-	  tmpmin = _mm512_mask_reduce_min_ps(mask, distance);
-	} else {
-	  tmpmin = _mm512_reduce_min_ps(distance);
-	}
-	//std::cerr << "tmpmin=" << tmpmin << std::endl;
-	if (min > tmpmin) min = tmpmin;
-      }
+    return min;
 #endif
+  }
+
+#ifdef NGTQBG_MIN
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
 #else
-      __m256i lol = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 0));
-      __m256i loh = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 1));
-      __m256i hil = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 0));
-      __m256i hih = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 1));
-      __m256 distancel = _mm256_cvtepi32_ps(_mm256_add_epi32(lol, hil));
-      __m256 distanceh = _mm256_cvtepi32_ps(_mm256_add_epi32(loh, hih));
-      __attribute__((aligned(32))) float v32[8];
-      _mm256_storeu_ps((float*)&v32, distancel);
-      _mm256_storeu_ps((float*)&v32, distanceh);
-      __m256 scalel = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
-      __m256 scaleh = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0]));
-      distancel = _mm256_mul_ps(distancel, scalel);
-      distancel = _mm256_add_ps(distancel, _mm256_set1_ps(distanceLUT.totalOffset));
-      distanceh = _mm256_mul_ps(distanceh, scaleh);
-      distanceh = _mm256_add_ps(distanceh, _mm256_set1_ps(distanceLUT.totalOffset));
-#if defined(NGTQG_DOT_PRODUCT)
-      float one = 1.0;
-      float two = 2.0;
-      distancel = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distancel), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two)));
-      distanceh = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distanceh), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two)));
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
 #endif
-      distancel = _mm256_sqrt_ps(distancel);
-      distanceh = _mm256_sqrt_ps(distanceh);
-      _mm256_storeu_ps(d, distancel);
-      _mm256_storeu_ps(d + 8, distanceh);
+    if (query == 0) {
+      NGTThrowException("Fatal inner error! The specified query is invalid..");
+    }
+#if 0
+    std::cerr << "q size=" << queryList.size() << std::endl;
+    std::cerr << "qs=";
+    for (size_t i = 0; i < queryList.size(); i++) {
+      std::cerr << queryList[i] << " ";
+    }
+    std::cerr << std::endl;
 #endif
-      d += 16;
+#ifdef NGTQBG_MIN
+    float min = std::numeric_limits<float>::max();
+#endif
+#define DIST1
+#if defined(DIST0)
+    for (size_t qi = 0; qi < queryList.size(); qi++) {
+      for (size_t idx = 0; idx < noOfObjects; idx++) {
+	auto *q = static_cast<uint8_t*>(query) + dimension * queryList[qi];
+	auto *o = static_cast<uint8_t*>(inv) + dimension * idx;
+	auto d = NGT::PrimitiveComparator::compareL2(reinterpret_cast<NGT::quint8*>(q),
+						     reinterpret_cast<NGT::quint8*>(o),
+						     dimension);
+	distances[noOfObjects * qi + idx] = d;
+      }
+    }
+#elif defined(DIST1)
+    size_t bsize = 192 * 1024 * 0.5 / dimension;
+    //size_t bsize = 20;
+    for (size_t bi = 0; bi < noOfObjects; bi += bsize) {
+      for (size_t qi = 0; qi < queryList.size(); qi++) {
+	if (qi + 1 < queryList.size()) {
+	  NGT::MemoryCache::prefetch(static_cast<uint8_t*>(query) + dimension * queryList[qi + 1], 64);
+	}
+	for (size_t idx = bi; idx < std::min(noOfObjects, bi + bsize); idx++) {
+	  auto *q = static_cast<uint8_t*>(query) + dimension * queryList[qi];
+	  auto *o = static_cast<uint8_t*>(inv) + dimension * idx;
+	  distances[noOfObjects * qi + idx] = compare(q, o, dimension);
+	}
+      }
     }
+#endif
 #ifdef NGTQBG_MIN
     return min;
 #endif
   }
-#endif /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION  ////////////////////////////////////////
 
-#else
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTable &distanceLUT) {
+    NGTThrowException("Not implemented.");
+    return 0.0;
+  }
+
+  inline double operator()(NGT::Object &object, size_t objectID, void *l) {
+    NGTThrowException("Not implemented.");
+    //return getL2DistanceFloat(object, objectID, static_cast<T*>(l));
+    return 0.0;
+  }
+  inline double operator()(NGT::Object &object, size_t objectID, void *l, DistanceLookupTable &distanceLUT) {
+    NGTThrowException("Not implemented.");
+    return 0.0;
+  }
+
+  void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) {}
+  void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTable &distanceLUT) {}
+  void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTableUint8 &distanceLUT) {}
+
+  uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+    //ScalarQuantizedInt8ObjectProcessingStream processingStream(localDivisionNo, invertedIndexObjects.size(), &typeid(OT), quantizer);
+    ScalarQuantizedInt8ObjectProcessingStream processingStream(localDivisionNo, invertedIndexObjects.size(), &typeid(QT), quantizer);
+    processingStream.arrange(invertedIndexObjects);
+    return processingStream.getStream();
+  }
+
+  size_t getNumOfAlignedObjects(size_t noOfObjects) { return noOfObjects; }
+  size_t getSizeOfCluster(size_t noOfObjects) {
+    ScalarQuantizedInt8ObjectProcessingStream processingStream(localDivisionNo);
+    return processingStream.getStreamSize(noOfObjects);;
+  }
+
+  float compareDotProduct(void *q, void *o, size_t dimension) {
+    double maxmag = 255.0 * 255.0 * dimension;
+    double d = NGT::PrimitiveComparator::compareDotProduct(reinterpret_cast<QT*>(q), reinterpret_cast<OT*>(o), dimension);
+    return maxmag - d;
+  }
+
+  float compareL2(void *q, void *o, size_t dimension) {
+    return NGT::PrimitiveComparator::compareL2(reinterpret_cast<QT*>(q), reinterpret_cast<OT*>(o), dimension);
+  }
+
+  float compare(void *q, void *o, size_t dimension) {
+    return (this->*comparePtr)(q, o, dimension);
+  }
+
+  void setCompareFunction(DistanceType dtype) {
+    switch (dtype) {
+    case DistanceType::DistanceTypeInnerProduct:
+    case DistanceType::DistanceTypeNormalizedCosine:
+      comparePtr = &ScalarQuantizedInt8ObjectDistance<QT, OT>::compareDotProduct; break;
+    case DistanceType::DistanceTypeL2:
+      comparePtr = &ScalarQuantizedInt8ObjectDistance<QT, OT>::compareL2; break;
+    default:
+      {
+	std::stringstream msg;
+	msg << "Invalid distance type. " << dtype;
+	NGTThrowException(msg);
+	break;
+      }
+    }
+  }
+
+  float (ScalarQuantizedInt8ObjectDistance<QT, OT>::*comparePtr)(void *, void*, size_t);
+  //float (*comparePtr)(void *, void*, size_t);
+
+};
+
+class ScalarQuantizedUint8TransposedObjectDistance : public ScalarQuantizedInt8ObjectDistance<NGT::quint8, NGT::quint8> {
+ public:
+  ScalarQuantizedUint8TransposedObjectDistance(Quantizer &q, DistanceType dtype):ScalarQuantizedInt8ObjectDistance<NGT::quint8, NGT::quint8>(q, dtype) {}
+  ~ScalarQuantizedUint8TransposedObjectDistance() {}
+
 #ifdef NGTQBG_MIN
-  inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) {
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) {
 #else
-  inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) {
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) {
 #endif
-    uint8_t *localID = static_cast<uint8_t*>(inv);
-#ifdef NGTQBG_MIN
-    float min = std::numeric_limits<float>::max();
+    if (query == 0) {
+      NGTThrowException("Fatal inner error! The specified query is invalid..");
+    }
+    const uint8_t *object = static_cast<uint8_t*>(inv);
+    const uint8_t *qobject = static_cast<uint8_t*>(query);
+    const unsigned char *last = qobject + dimension;
+    __m256i sum256[noOfObjects];
+#if defined(NGT_AVX512)
+    {
+      __m512i sum512[noOfObjects];
+      for (size_t oi = 0; oi < noOfObjects; oi++) {
+	sum512[oi] = _mm512_setzero_si512();
+      }
+      const unsigned char *lastgroup = last - 63;
+      while (qobject < lastgroup) {
+	__m512i q = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(qobject));
+	for (size_t oi = 0; oi < noOfObjects; oi++) {
+	  __m512i o = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(object));
+	  __mmask64 m = _mm512_cmplt_epu8_mask(q, o);
+	  __m512i x = _mm512_add_epi8(_mm512_maskz_subs_epu8(m, o, q),
+				      _mm512_maskz_subs_epu8(~m, q, o));
+	  __m512i xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,0));
+	  sum512[oi] = _mm512_add_epi32(sum512[oi], _mm512_madd_epi16(xi16, xi16));
+	  xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,1));
+	  sum512[oi] = _mm512_add_epi32(sum512[oi], _mm512_madd_epi16(xi16, xi16));
+	  object += 64;
+        }
+        qobject += 64;
+      }
+      for (size_t oi = 0; oi < noOfObjects; oi++) {
+	sum256[oi] = _mm256_add_epi32(_mm512_extracti32x8_epi32(sum512[oi], 0),
+				      _mm512_extracti32x8_epi32(sum512[oi], 1));
+      }
+    }
+#elif defined(NGT_AVX2)
+    {
+      for (size_t oi = 0; oi < noOfObjects; oi++) {
+	sum256[oi] = _mm256_setzero_si256();
+      }
+      //__attribute__((aligned(32))) uint16_t iv[16];
+      const unsigned char *lastgroup = last - 63;
+      while (qobject < lastgroup) {
+	//std::cerr << "qobj=" << (size_t)qobject << ":" << (size_t)lastgroup << std::endl;
+	__m256i q[4];
+	q[0] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject));
+	qobject += 16;
+	q[1] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject));
+	qobject += 16;
+	q[2] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject));
+	qobject += 16;
+	q[3] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject));
+	qobject += 16;
+	for (size_t oi = 0; oi < noOfObjects; oi++) {
+	  __m256i o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object));
+	  __m256i sub16 = _mm256_subs_epi16(q[0], o);
+	  sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16));
+	  object += 16;
+	  o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object));
+	  sub16 = _mm256_subs_epi16(q[1], o);
+	  sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16));
+	  object += 16;
+	  o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object));
+	  sub16 = _mm256_subs_epi16(q[2], o);
+	  sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16));
+	  object += 16;
+	  o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object));
+	  sub16 = _mm256_subs_epi16(q[3], o);
+	  sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16));
+	  object += 16;
+	}
+      }
+    }
 #endif
-    size_t numOfAlignedSubvectors = ((localDivisionNo - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE;
-    size_t alignedSize = ((size - 1) / 2 + 1) * 2;
-    uint32_t d[NGTQ_SIMD_BLOCK_SIZE];
-    size_t didx = 0;
-    size_t byteSize = numOfAlignedSubvectors * alignedSize / 2;
-    auto *last = localID + byteSize;
-    while (localID < last) {
-      uint8_t *lut = distanceLUT.localDistanceLookup;
-      memset(d, 0, sizeof(uint32_t) * NGTQ_SIMD_BLOCK_SIZE);
-      for (size_t li = 0; li < numOfAlignedSubvectors; li++) {
-	for (size_t i = 0; i < NGTQ_SIMD_BLOCK_SIZE; i++) {
-	  uint8_t obj = *localID;
-	  if (i % 2 == 0) {
-	    obj &= 0x0f;
-	  } else {
-	    obj >>= 4;
-	    localID++;
-	  }
-	  d[i] += *(lut + obj);
+    {
+      //__attribute__((aligned(32))) uint16_t iv[16];
+      const unsigned char *lastgroup = last - 15;
+      while (qobject < lastgroup) {
+	__m256i q = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject));
+	for (size_t oi = 0; oi < noOfObjects; oi++) {
+	  __m256i o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object));
+	  __m256i sub16 = _mm256_subs_epi16(q, o);
+	  sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16));
+	  object += 16;
 	}
-	lut += localCodebookCentroidNo - 1;
+	qobject += 16;
       }
-      for (size_t i = 0; i < NGTQ_SIMD_BLOCK_SIZE; i++) {
-	distances[didx + i] = sqrt(static_cast<float>(d[i]) * distanceLUT.scales[0] + distanceLUT.totalOffset);
+    }
 #ifdef NGTQBG_MIN
-	if (min > distances[didx + i]) {
-	  min = distances[didx + i];
-	}
+    float min = std::numeric_limits<float>::max();
 #endif
+    const __m256i value0 = _mm256_set1_epi32(0);
+    for (size_t oi = 0; oi < noOfObjects; oi++) {
+      __m256i tmp1 = _mm256_hadd_epi32(sum256[oi], value0);
+      __m256i tmp2 = _mm256_hadd_epi32(tmp1, value0);
+      distances[oi] = _mm256_extract_epi32(tmp2, 0) + _mm256_extract_epi32(tmp2, 4);
+      if (distances[oi] < min) {
+	min = distances[oi];
       }
-      didx += NGTQ_SIMD_BLOCK_SIZE;
     }
 #ifdef NGTQBG_MIN
     return min;
 #endif
   }
-#endif
-
 
-  inline double operator()(NGT::Object &object, size_t objectID, void *l) {
-    return getL2DistanceFloat(object, objectID, static_cast<T*>(l));
-  }
-  inline double operator()(NGT::Object &object, size_t objectID, void *l, DistanceLookupTable &distanceLUT) {
-    T *localID = static_cast<T*>(l);
-    NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID);
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    float *gcptr = (float*)&gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator);
-#else
-    float *gcptr = (float*)&gcentroid[0];
-#endif
-    float *optr = (float*)&((NGT::Object&)object)[0];
-    double distance = 0.0;
-    for (size_t li = 0; li < localDivisionNo; li++) {
-      size_t distanceLUTidx = li * localCodebookCentroidNo + localID[li];
-      if (distanceLUT.isValid(distanceLUTidx)) {
-	distance += distanceLUT.getDistance(distanceLUTidx);
-	optr += localDataSize;
-	gcptr += localDataSize;
-      } else {
-        size_t idx = li;
-	NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]);
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-	float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator);
-#else
-	float *lcptr = (float*)&lcentroid[0];
-#endif
-#if defined(NGTQG_AVX512) || defined(NGTQG_AVX2)
-	float *lcendptr = lcptr + localDataSize;
-	__m256 sum256 = _mm256_setzero_ps();
-	__m256 v;
-	while (lcptr < lcendptr) {
-	  v = _mm256_sub_ps(_mm256_sub_ps(_mm256_loadu_ps(optr), _mm256_loadu_ps(gcptr)), _mm256_loadu_ps(lcptr));
-	  sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v, v));
-	  optr += 8;
-	  gcptr += 8;
-	  lcptr += 8;
-	}
-	__m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1));
-	__attribute__((aligned(32))) float f[4];
-	_mm_store_ps(f, sum128);
-	double d = f[0] + f[1] + f[2] + f[3];
+#ifdef NGTQBG_MIN
+  inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
+    return 0.0;
 #else
-	float *lcendptr = lcptr + localDataSize;
-	double d = 0.0;
-	while (lcptr != lcendptr) {
-	  double sub = (*optr++ - *gcptr++) - *lcptr++;
-	  d += sub * sub;
-	}
+  inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector<uint32_t> &queryList) {
 #endif
-	distance += d;
-      }
+  }
+
+  uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+    if (invertedIndexObjects.numOfSubvectors != localDivisionNo) {
+      std::stringstream msg;
+      msg << "Internal fatal error. Invalid # of subvectos. " << invertedIndexObjects.numOfSubvectors << ":" << localDivisionNo;
+      NGTThrowException(msg);
     }
-    return sqrt(distance);
+    ScalarQuantizedUint8TransposedObjectProcessingStream processingStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size(), quantizer);
+    processingStream.arrange(invertedIndexObjects);
+    return processingStream.getStream();
+  }
+  void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects,
+				size_t numOfSubspaces, std::vector<uint32_t> &ids, void *objects) {
+    NGTThrowException("not implemented");
+  }
+  size_t getSizeOfCluster(size_t noOfObjects) {
+    ScalarQuantizedUint8TransposedObjectProcessingStream processingStream(localDivisionNo);
+    return processingStream.getStreamSize(noOfObjects);;
   }
-#endif
 
 };
 
+#endif
 
 class Quantizer {
 public:
 #ifdef NGTQ_STATIC_OBJECT_FILE
-  typedef StaticObjectFile<NGT::Object>	ObjectList;
+  typedef StaticObjectFile<NGT::Object>	ObjectList;	
 #else
-  typedef ObjectFile	ObjectList;
+  typedef ObjectFile	ObjectList;	
 #endif
 
 
@@ -2290,16 +3226,16 @@ class Quantizer {
 #ifdef NGTQ_QBG
   virtual void createIndex(size_t beginID, size_t endID) = 0;
 #endif
-  virtual void setupInvertedIndex(std::vector<std::vector<float>> &quantizationCodebook,
-					std::vector<uint32_t> &codebookIndex,
-					std::vector<uint32_t> &objectIndex) = 0;
+  virtual void setupInvertedIndex(std::vector<uint32_t> &codebookIndex,
+				  std::vector<std::vector<uint32_t>> &objectIndex) = 0;
 #ifndef NGTQ_QBG
   virtual void rebuildIndex() = 0;
 #endif
   virtual void save() = 0;
   virtual void loadQuantizationCodebookAndRotation(const  std::vector<std::vector<float>> &quantizationCodebook, const std::vector<float> &rotation) = 0;
-  virtual void open(const string &index, NGT::Property &globalProperty, bool readOnly) = 0;
-  virtual void open(const string &index, bool readOnly) = 0;
+  virtual void open(const string &index, NGT::Property &globalProperty, bool readOnly,
+		    DataType refinementDataType = DataTypeAny) = 0;
+  virtual void open(const string &index, bool readOnly, DataType refinementDataType = DataTypeAny) = 0;
   virtual void close() = 0;
   virtual void closeCodebooks() = 0;
 #ifdef NGTQ_SHARED_INVERTED_INDEX
@@ -2380,7 +3316,6 @@ class Quantizer {
 #endif
   virtual size_t getInvertedIndexSize() = 0;
 
-  //void searchIndex(NGT::GraphAndTreeIndex &codebook,
   static void searchIndex(NGT::GraphAndTreeIndex &globalCodebookIndex,
 #ifdef NGTQ_VECTOR_OBJECT
 		   const vector<pair<std::vector<float>, size_t>> &objects,
@@ -2391,216 +3326,73 @@ class Quantizer {
   {
     ids.clear();
     ids.resize(objects.size());
-#pragma omp parallel for
-    for (size_t idx = 0; idx < objects.size(); idx++) {
-#ifdef NGTQ_VECTOR_OBJECT
-      auto *object = globalCodebookIndex.allocateObject(objects[idx].first);
-      globalCodebookIndex.deleteObject(object);
-#else
-#endif
-      NGT::ObjectDistances result;
-#define QID_WEIGHT	100
-      {
-#ifdef NGTQ_VECTOR_OBJECT
-	auto *object = globalCodebookIndex.allocateObject(objects[idx].first);
-	NGT::SearchContainer sc(*object);
-#else
-	NGT::SearchContainer sc(*objects[idx].first);
-#endif
-	sc.setResults(&result);
-	sc.setSize(10);
-	sc.radius = FLT_MAX;
-	sc.setEpsilon(0.1);
-	globalCodebookIndex.search(sc);
-#ifdef NGTQ_VECTOR_OBJECT
-	globalCodebookIndex.deleteObject(object);
-#endif
-      }
-      ids[idx].id = result[0].id;
-      ids[idx].distance = result[0].distance;
-      ids[idx].identical = true;
-    }
-    return;
-  }
-
-  static const std::string getInvertedIndexFile() { return "ivt"; }
-  static const std::string getGlobalFile() { return "global"; }
-  static const std::string getLocalPrefix() { return "local-"; }
-  static const std::string getRotationFile() { return "qr"; }
-  static const std::string getGlobalToInvertedIndexFile() { return "g2i"; }
-
-  ObjectList	objectList;
-
-  string	rootDirectory;
-
-  Property	property;
-
-  NGT::Index	globalCodebookIndex;
-
-  size_t	distanceComputationCount;
-
-  size_t	localIDByteSize;
-  NGT::ObjectSpace::ObjectType objectType;
-  size_t	divisionNo;
-
-  std::vector<NGT::Index>	localCodebookIndexes;
-
-  QuantizationCodebook<float>	quantizationCodebook;
-  std::vector<uint32_t>		objectToBlobIndex;
-  Rotation			rotation;
-
-#ifdef NGTQ_OBJECT_IN_MEMORY
-  NGT::Repository<NGT::Object>	objectListOnMemory;
-#endif
-};
-
-class QuantizedObjectProcessingStream {
- public:
-  QuantizedObjectProcessingStream(size_t divisionNo, size_t nOfObjects): stream(0) {
-    initialize(divisionNo);
-    numOfObjects = nOfObjects;
-    setStreamSize();
-    stream = new uint8_t[streamSize]();
-  }
-
-  QuantizedObjectProcessingStream(size_t numOfSubspaces): stream(0) {
-    initialize(numOfSubspaces);
-  }
-
-  ~QuantizedObjectProcessingStream() {
-    delete[] stream;
-  }
-
-  void initialize(size_t divisionNo) {
-    numOfAlignedSubvectors = ((divisionNo - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE;
-    alignedBlockSize = NGTQ_SIMD_BLOCK_SIZE * numOfAlignedSubvectors;
-  }
-
-  static size_t getNumOfAlignedObjects(size_t numOfObjects) {
-    return (((numOfObjects - 1) / NGTQ_SIMD_BLOCK_SIZE + 1) * NGTQ_SIMD_BLOCK_SIZE);
-  }
-  
-  void setStreamSize() {
-    numOfAlignedObjects  = getNumOfAlignedObjects(numOfObjects);
-    streamSize = numOfAlignedObjects * numOfAlignedSubvectors;
-    return;
-  }
-
-#ifdef NGTQ_QBG
-  void arrangeQuantizedObject(size_t dataNo, size_t subvectorNo, uint8_t quantizedObject) {
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    abort();
-#else
-    size_t blkNo = dataNo / NGTQ_SIMD_BLOCK_SIZE;
-    size_t oft = dataNo - blkNo * NGTQ_SIMD_BLOCK_SIZE;
-    stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft] = quantizedObject;
-#endif
-  }
-
-  void arrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
-    for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) {
-      for (size_t idx = 0; idx < invertedIndexObjects.numOfSubvectors; idx++) {
-	arrangeQuantizedObject(oidx, idx, invertedIndexObjects[oidx].localID[idx] - 1);
-      }
-    }
-  }
-
-  uint8_t getQuantizedObject(size_t dataNo, size_t subvectorNo) {
-    size_t blkNo = dataNo / NGTQ_SIMD_BLOCK_SIZE;
-    size_t oft = dataNo - blkNo * NGTQ_SIMD_BLOCK_SIZE;
-    return stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft];
-  }
-#endif
-
-  uint8_t* compressIntoUint4() {
-    size_t idx = 0;
-    size_t uint4StreamSize = streamSize / 2;
-    uint8_t *uint4Objects = new uint8_t[uint4StreamSize]();
-    while (idx < streamSize) {
-      for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) {
-	for (size_t bidx = 0; bidx < NGTQ_SIMD_BLOCK_SIZE; bidx++) {
-	  if (idx / 2 > uint4StreamSize) {
-	    std::stringstream msg;
-	    msg << "Quantizer::compressIntoUint4: Fatal inner error! " << (idx / 2) << ":" << uint4StreamSize;
-	    NGTThrowException(msg);
-	  }
-	  if (idx % 2 == 0) {
-	    uint4Objects[idx / 2] = stream[idx];
-	  } else {
-	    uint4Objects[idx / 2] |= (stream[idx] << 4);
-	  }
-	  idx++;
-	}
-      }
-    }
-    return uint4Objects;
-  }
-
-  void uncompressFromUint4(uint8_t *uint4Objects) {
-    size_t idx = 0;
-    size_t uint4StreamSize = streamSize / 2;
-    while (idx < streamSize) {
-      for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) {
-	for (size_t bidx = 0; bidx < NGTQ_SIMD_BLOCK_SIZE; bidx++) {
-	  if (idx / 2 > uint4StreamSize) {
-	    std::stringstream msg;
-	    msg << "Quantizer::uncompressFromUint4: Fatal inner error! " << (idx / 2) << ":" << uint4StreamSize;
-	    NGTThrowException(msg);
-	  }
-	  if (idx % 2 == 0) {
-	    stream[idx] = uint4Objects[idx / 2] & 0x0f;
-	  } else {
-	    stream[idx] = uint4Objects[idx / 2] >> 4;
-	  }
-	  idx++;
-	}
-      }
-    }
-  }
-
-#ifdef NGTQ_QBG
-  void restoreToInvertedIndex(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
-    std::cerr << "Not implemented." << std::endl;
-    abort();
+#pragma omp parallel for
+    for (size_t idx = 0; idx < objects.size(); idx++) {
+#ifdef NGTQ_VECTOR_OBJECT
+      auto *object = globalCodebookIndex.allocateObject(objects[idx].first);
+      globalCodebookIndex.deleteObject(object);
 #else
-    invertedIndexObjects.resize(numOfAlignedObjects);
-    for (size_t oidx = 0; oidx < numOfAlignedObjects; oidx++) {
-      for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) {
-	invertedIndexObjects[oidx].localID[lidx] = getQuantizedObject(oidx, lidx) + 1;
+#endif
+      NGT::ObjectDistances result;
+#define QID_WEIGHT	100	
+      {
+#ifdef NGTQ_VECTOR_OBJECT
+	auto *object = globalCodebookIndex.allocateObject(objects[idx].first);
+	NGT::SearchContainer sc(*object);
+#else
+	NGT::SearchContainer sc(*objects[idx].first);
+#endif
+	sc.setResults(&result);
+	sc.setSize(10);
+	sc.radius = FLT_MAX;
+	sc.setEpsilon(0.1);
+	globalCodebookIndex.search(sc);
+#ifdef NGTQ_VECTOR_OBJECT
+	globalCodebookIndex.deleteObject(object);
+#endif
       }
+      ids[idx].id = result[0].id;
+      ids[idx].distance = result[0].distance;
+      ids[idx].identical = true;
     }
-    invertedIndexObjects.resize(numOfObjects);
-#endif
+    return;
   }
-#endif
 
-  uint8_t* getStream() {
-    auto s = stream;
-    stream = 0;
-    return s;
+  static const std::string getInvertedIndexFile() { return "ivt"; }
+  static const std::string getGlobalFile() { return "global"; }
+  static const std::string getLocalPrefix() { return "local-"; }
+  static const std::string getRotationFile() { return "qr"; }
+  static const std::string getGlobalToInvertedIndexFile() { return "g2i"; }
+  void saveProperty() {
+    property.save(rootDirectory);
   }
 
-  size_t getUint4StreamSize(size_t nOfObjects) {
-    numOfObjects = nOfObjects;
-    setStreamSize();
-    return streamSize / 2;
-  }
+  ObjectList	objectList;
 
-  size_t getStreamSize(size_t nOfObjects) {
-    numOfObjects = nOfObjects;
-    setStreamSize();
-    return streamSize;
-  }
+  string	rootDirectory;
+
+  Property	property;
+
+  NGT::Index	globalCodebookIndex;
+
+  size_t	distanceComputationCount;
+
+  size_t	localIDByteSize;
+  NGT::ObjectSpace::ObjectType objectType;
+  size_t	divisionNo;
+
+  std::vector<NGT::Index>	localCodebookIndexes;
+
+  QuantizationCodebook<float>	quantizationCodebook;
+  std::vector<std::vector<uint32_t>>		objectToBlobIndex;
+  Rotation			rotation;
+
+#ifdef NGTQ_OBJECT_IN_MEMORY
+  NGT::ObjectSpace	*refinementObjectSpace;
+#endif
+  NGT::ObjectSpace	*refinementObjectSpaceForObjectList;
+  };
 
-  uint8_t	*stream;
-  size_t	numOfAlignedSubvectors;
-  size_t	alignedBlockSize;
-  size_t	numOfAlignedObjects ;
-  size_t	numOfObjects ;
-  size_t	streamSize;
-};
- 
 class GenerateResidualObject {
 public:
   GenerateResidualObject():globalCodebookIndex(0), objectList(0), quantizationCodebook(0) {}
@@ -2783,6 +3575,8 @@ class QuantizerInstance : public Quantizer {
     generateResidualObject = 0;
     localCodebooks = 0;
     verbose = false;
+    refinementObjectSpace = 0;
+    refinementObjectSpaceForObjectList = 0;
   }
 
   virtual ~QuantizerInstance() { close(); }
@@ -2797,6 +3591,7 @@ class QuantizerInstance : public Quantizer {
 			NGT::Property &localProperty)
 #endif
   {
+
     rootDirectory = index;
     NGT::Index::mkdir(rootDirectory);
     string global = rootDirectory + "/" + getGlobalFile();
@@ -2839,17 +3634,6 @@ class QuantizerInstance : public Quantizer {
     invertedIndex.serialize(of);
 #endif
     string fname = rootDirectory + "/obj";
-    if (property.dataSize == 0) {
-      std::stringstream msg;
-#ifdef NGTQ_QBG
-      msg << "Quantizer: data size of the object is zero. " << property.dataSize << ":" << property.dimension
-	  << ":" << property.dataType << ":" << property.genuineDataType;
-#else
-      msg << "Quantizer: data size of the object is zero. " << property.dataSize << ":" << property.dimension
-	  << ":" << property.dataType;
-#endif
-      NGTThrowException(msg);
-    }
 #ifdef NGTQ_STATIC_OBJECT_FILE
     if (!objectList.create(fname, objectFile)) {
       std::stringstream msg;
@@ -2861,7 +3645,7 @@ class QuantizerInstance : public Quantizer {
     objectList.openMultipleStreams(omp_get_max_threads());
 #endif
 #else
-    objectList.create(fname, property.dataSize);
+    objectList.create(fname, property.getDataSize());
 #endif
 #ifdef NGTQ_QBG
     if (rotation != 0) {
@@ -2887,7 +3671,7 @@ class QuantizerInstance : public Quantizer {
     qCodebook.serialize(ofs);
   }
 
-    void loadQuantizationCodebookAndRotation(const std::vector<std::vector<float>> &qCodebook, const std::vector<float> &rotation) {
+  void loadQuantizationCodebookAndRotation(const std::vector<std::vector<float>> &qCodebook, const std::vector<float> &rotation) {
     QuantizationCodebook<float> qc;
     qc.setPaddedDimension(globalCodebookIndex.getObjectSpace().getPaddedDimension());
     qc = qCodebook;
@@ -2903,12 +3687,12 @@ class QuantizerInstance : public Quantizer {
     saveQuantizationCodebook(qc);
   }
 
-  void open(const string &index, NGT::Property &globalProperty, bool readOnly) {
-    open(index, readOnly);
+  void open(const string &index, NGT::Property &globalProperty, bool readOnly, DataType refinementDataType) {
+    open(index, readOnly, refinementDataType);
     globalCodebookIndex.setProperty(globalProperty);
   }
 
-  void open(const string &index, bool readOnly) {
+  void open(const string &index, bool readOnly, DataType refinementDataType) {
     NGT::StdOstreamRedirector redirector(!verbose);
     redirector.begin();
     rootDirectory = index;
@@ -2967,63 +3751,161 @@ class QuantizerInstance : public Quantizer {
     objectList.openMultipleStreams(omp_get_max_threads());
 #endif
 #ifdef NGTQ_OBJECT_IN_MEMORY
-    if (property.objectListOnMemory) {
-      objectListOnMemory.resize(objectList.size());
-      for (size_t id = 1; id < objectList.size(); id++) {
-	std::vector<float> object;
-	objectList.get(id, object, &globalCodebookIndex.getObjectSpace());
-	NGT::Object *ngtObject = globalCodebookIndex.allocateObject(object);
-	objectListOnMemory.put(id, ngtObject);
+    refinementObjectSpace = 0;
+    refinementDataType = refinementDataType == DataTypeAny ? property. refinementDataType : refinementDataType;
+    if (refinementDataType != DataTypeNone) {
+      auto distanceType = property.distanceType == NGT::ObjectSpace::DistanceTypeInnerProduct
+	                ? NGT::ObjectSpace::DistanceTypeDotProduct : property.distanceType;
+      try {
+	switch (refinementDataType) {
+	case DataTypeFloat:
+	  refinementObjectSpace = new NGT::ObjectSpaceRepository<float, double>(objectList.pseudoDimension, 
+										typeid(float),
+										distanceType,
+										property.maxMagnitude);
+	  break;
+	case DataTypeFloat16:
+	  refinementObjectSpace = new NGT::ObjectSpaceRepository<NGT::float16, double>(objectList.pseudoDimension, 
+										       typeid(NGT::float16),
+										       distanceType,
+										       property.maxMagnitude);
+	  break;
+	default:
+	  stringstream msg;
+	  msg << "Invalid refinement data type. " << refinementDataType;
+	  NGTThrowException(msg);
+	  break;
+	}
+	
+	auto &repo = refinementObjectSpace->getRepository();
+	repo.initialize();
+	for (size_t id = 1; id < objectList.size(); id++) {
+	  std::vector<float> object;
+	  objectList.get(id, object, refinementObjectSpace);
+	  auto *o = repo.allocateNormalizedPersistentObject(object);
+	  repo.push_back(dynamic_cast<NGT::PersistentObject*>(o));
+	}
+      } catch(NGT::Exception &err) {
+	stringstream msg;
+	msg << "Fatal inner error. Cannot set up the refinmentObjectSpace. " << err.what();
+	NGTThrowException(msg);
+      }
+    }
+#endif
+    refinementObjectSpaceForObjectList = 0;
+#ifdef NGTQ_QBG
+    if (property.genuineDataType != ObjectFile::DataTypeNone) {
+      auto distanceType = property.distanceType == NGT::ObjectSpace::DistanceTypeInnerProduct
+	                ? NGT::ObjectSpace::DistanceTypeDotProduct : property.distanceType;
+      try {
+	switch (property.genuineDataType) {
+	case DataTypeFloat:
+	  refinementObjectSpaceForObjectList = new NGT::ObjectSpaceRepository<float, double>(objectList.pseudoDimension, 
+										typeid(float),
+										distanceType,
+										property.maxMagnitude);
+	  break;
+	case DataTypeFloat16:
+	  refinementObjectSpaceForObjectList = new NGT::ObjectSpaceRepository<NGT::float16, double>(objectList.pseudoDimension, 
+										       typeid(NGT::float16),
+										       distanceType,
+										       property.maxMagnitude);
+	  break;
+	default:
+	  stringstream msg;
+	  msg << "Invalid refinement data type for the object list. " << property.genuineDataType;
+	  NGTThrowException(msg);
+	  break;
+	}
+      } catch(NGT::Exception &err) {
+	stringstream msg;
+	msg << "Fatal inner error. Cannot set up the refinmentObjectSpac for the object liste. " << err.what();
+	NGTThrowException(msg);
       }
     }
 #endif
     NGT::Property globalProperty;
     globalCodebookIndex.getProperty(globalProperty);
     size_t sizeoftype = 0;
+    switch (property.localClusterDataType) {
+#ifdef NGT_IVI
+    case ClusterDataTypeNQ:
+      {
+	quantizedObjectDistance = new NonQuantizedObjectDistance<float>(*this);
+	generateResidualObject = new GenerateResidualObjectFloat;
+	sizeoftype = sizeof(float);
+	break;
+      }
+    case ClusterDataTypeSQSU8:
+      {
+	if (property.distanceType == NGT::ObjectSpace::DistanceTypeInnerProduct) {
+	  quantizedObjectDistance = new ScalarQuantizedInt8ObjectDistance<NGT::qsint8, NGT::quint8>(*this, property.distanceType);
+	} else {
+	  quantizedObjectDistance = new ScalarQuantizedInt8ObjectDistance<NGT::qsint8, NGT::qsint8>(*this, property.distanceType);
+	}
+	generateResidualObject = new GenerateResidualObjectFloat;
+	sizeoftype = sizeof(float);
+	break;
+      }
+#endif
+    default:
+      {
 #ifdef NGT_HALF_FLOAT
-    if (globalProperty.objectType == NGT::Property::ObjectType::Float ||
-	globalProperty.objectType == NGT::Property::ObjectType::Float16) {
+	if (globalProperty.objectType == NGT::Property::ObjectType::Float ||
+	    globalProperty.objectType == NGT::Property::ObjectType::Float16) {
 #else
-    if (globalProperty.objectType == NGT::Property::ObjectType::Float) {
+	if (globalProperty.objectType == NGT::Property::ObjectType::Float) {
 #endif
-      if (property.localIDByteSize == 4) {
-	quantizedObjectDistance = new QuantizedObjectDistanceFloat<uint32_t>;
-      } else if (property.localIDByteSize == 2) {
-	quantizedObjectDistance = new QuantizedObjectDistanceFloat<uint16_t>;
+	  if (property.localIDByteSize == 4) {
+	    quantizedObjectDistance = new QuantizedObjectDistanceFloat<uint32_t>;
+	  } else if (property.localIDByteSize == 2) {
+	    quantizedObjectDistance = new QuantizedObjectDistanceFloat<uint16_t>;
 #ifdef NGTQ_QBG
-      } else if (property.localIDByteSize == 1) {
-	quantizedObjectDistance = new QuantizedObjectDistanceFloat<uint8_t>;
+	  } else if (property.localIDByteSize == 1) {
+	    quantizedObjectDistance = new QuantizedObjectDistanceFloat<uint8_t>;
 #endif
-      } else {
-	std::cerr << "Invalid localIDByteSize : " << property.localIDByteSize << std::endl;
-	abort();
-      }
-      generateResidualObject = new GenerateResidualObjectFloat;
-      sizeoftype = sizeof(float);
-    } else if (globalProperty.objectType == NGT::Property::ObjectType::Uint8) {
-      if (property.localIDByteSize == 4) {
-	quantizedObjectDistance = new QuantizedObjectDistanceUint8<uint32_t>;
-      } else if (property.localIDByteSize == 2) {
-	quantizedObjectDistance = new QuantizedObjectDistanceUint8<uint16_t>;
+	  } else {
+	    std::cerr << "Invalid localIDByteSize : " << property.localIDByteSize << std::endl;
+	    abort();
+	  }
+	  generateResidualObject = new GenerateResidualObjectFloat;
+	  sizeoftype = sizeof(float);
+	} else if (globalProperty.objectType == NGT::Property::ObjectType::Uint8) {
+	  if (property.localIDByteSize == 4) {
+	    quantizedObjectDistance = new QuantizedObjectDistanceUint8<uint32_t>;
+	  } else if (property.localIDByteSize == 2) {
+	    quantizedObjectDistance = new QuantizedObjectDistanceUint8<uint16_t>;
 #ifdef NGTQ_QBG
-      } else if (property.localIDByteSize == 1) {
-	quantizedObjectDistance = new QuantizedObjectDistanceFloat<uint8_t>;
+	  } else if (property.localIDByteSize == 1) {
+	    quantizedObjectDistance = new QuantizedObjectDistanceFloat<uint8_t>;
 #endif
-      } else {
-	std::cerr << "Inconsistent localIDByteSize and ObjectType. " << property.localIDByteSize << ":" << globalProperty.objectType << std::endl;
-	abort();
-      }
+	  } else {
+	    std::cerr << "Inconsistent localIDByteSize and ObjectType. " << property.localIDByteSize << ":" << globalProperty.objectType << std::endl;
+	    abort();
+	  }
 #ifdef NGTQ_VECTOR_OBJECT
-      generateResidualObject = new GenerateResidualObjectFloat;
-      sizeoftype = sizeof(float);
+	  generateResidualObject = new GenerateResidualObjectFloat;
+	  sizeoftype = sizeof(float);
 #else
-      generateResidualObject = new GenerateResidualObjectUint8;
-      sizeoftype = sizeof(uint8_t);
+	  generateResidualObject = new GenerateResidualObjectUint8;
+	  sizeoftype = sizeof(uint8_t);
 #endif
-    } else {
-      cerr << "NGTQ::open: Fatal Inner Error: invalid object type. " << globalProperty.objectType << endl;
-      cerr << "   check NGT version consistency between the caller and the library." << endl;
-      abort();
+	} else {
+	  cerr << "NGTQ::open: Fatal Inner Error: invalid object type. " << globalProperty.objectType << endl;
+	  cerr << "   check NGT version consistency between the caller and the library." << endl;
+	  abort();
+	}
+	break;
+      }
+    }
+    if (quantizedObjectDistance == 0) {
+      NGTThrowException("Inner fatal error! quantizeObjectDistance is invalid.");
+    }
+    if (generateResidualObject == 0) {
+      NGTThrowException("Inner fatal error! generateResidualObject is invalid.");
+    }
+    if (sizeoftype == 0) {
+      NGTThrowException("Inner fatal error! sizeoftype is invalid.");
     }
     assert(quantizedObjectDistance != 0);
 #ifdef NGTQ_QBG
@@ -3113,10 +3995,18 @@ class QuantizerInstance : public Quantizer {
   void close() {
     objectList.close();
 #ifdef NGTQ_OBJECT_IN_MEMORY
-    for (size_t i = 1; i < objectListOnMemory.size(); i++) {
-      globalCodebookIndex.deleteObject(objectListOnMemory.get(i));
+    if (refinementObjectSpace != 0) {
+#ifndef NGT_SHARED_MEMORY_ALLOCATOR
+      refinementObjectSpace->deleteAll();
+#endif
+      delete refinementObjectSpace;
+      refinementObjectSpace = 0;
     }
 #endif
+    if (refinementObjectSpaceForObjectList != 0) {
+      delete refinementObjectSpaceForObjectList;
+      refinementObjectSpaceForObjectList = 0;
+    }
     closeCodebooks();
     if (quantizedObjectDistance != 0) {
       delete quantizedObjectDistance;
@@ -3304,7 +4194,7 @@ class QuantizerInstance : public Quantizer {
     float lr = property.localRange;
     size_t localCentroidLimit = property.localCentroidLimit;
     if (property.localCodebookState) {
-      lr = FLT_MAX;
+      lr = FLT_MAX;	
       localCentroidLimit = 0;
     }
     vector<NGT::Index::InsertionResult> lids;
@@ -3342,7 +4232,7 @@ class QuantizerInstance : public Quantizer {
 	localCentroidLimit *= property.localClusteringSampleCoefficient;
       }
       if (property.localCodebookState) {
-	lr = FLT_MAX;
+	lr = FLT_MAX;	
 	localCentroidLimit = 0;
       } else {
 	if (property.localCentroidCreationMode == CentroidCreationModeDynamicKmeans) {
@@ -3390,7 +4280,7 @@ class QuantizerInstance : public Quantizer {
 	localCentroidLimit *= property.localClusteringSampleCoefficient;
       }
       if (property.localCodebookState) {
-	lr = FLT_MAX;
+	lr = FLT_MAX;	
 	localCentroidLimit = 0;
       } else {
 	if (property.localCentroidCreationMode == CentroidCreationModeDynamicKmeans) {
@@ -3605,14 +4495,15 @@ class QuantizerInstance : public Quantizer {
 
 #ifdef NGTQ_VECTOR_OBJECT
   void getBlobIDFromObjectToBlobIndex(const vector<pair<std::vector<float>, size_t>> &objects,
-				      vector<NGT::Index::InsertionResult> &ids)
+				      vector<NGT::Index::InsertionResult> &ids,
+				      vector<uint32_t> &ids2oidx)
 #else
   void getBlobIDFromObjectToBlobIndex(const vector<pair<NGT::Object*, size_t>> &objects,
-				      vector<NGT::Index::InsertionResult> &ids)
+				      vector<NGT::Index::InsertionResult> &ids,
+				      vector<uint32_t> &ids2oidx)
 #endif
   {
     ids.clear();
-    ids.resize(objects.size());
 #ifdef GET_BLOB_EVAL
     size_t identicalObjectCount = 0;
 #endif
@@ -3624,25 +4515,26 @@ class QuantizerInstance : public Quantizer {
 	    << ":" << objects.size();
 	NGTThrowException(msg);
       }
-      ids[idx].id = objectToBlobIndex[objects[idx].second - 1] + 1;
-      ids[idx].distance = 0.0;
-      ids[idx].identical = true;
+      for (auto bid : objectToBlobIndex[objects[idx].second - 1]) {
+	ids.emplace_back(NGT::Index::InsertionResult(bid + 1, true, 0.0));
+	ids2oidx.emplace_back(static_cast<uint32_t>(idx));
 #ifdef GET_BLOB_EVAL
-      {
-	NGT::ObjectDistances result;
-	NGT::SearchContainer sc(*objects[idx].first);
-	sc.setResults(&result);
-	sc.setSize(50);
-	sc.radius = FLT_MAX;
-	sc.setEpsilon(0.1);
-	globalCodebookIndex.search(sc);
-	//std::cerr << "insert:Eval: ";
-	if (result[0].id == ids[idx].id) {
-	  identicalObjectCount++;
-	} else {
+	{
+	  NGT::ObjectDistances result;
+	  NGT::SearchContainer sc(*objects[idx].first);
+	  sc.setResults(&result);
+	  sc.setSize(50);
+	  sc.radius = FLT_MAX;
+	  sc.setEpsilon(0.1);
+	  globalCodebookIndex.search(sc);
+	  //std::cerr << "insert:Eval: ";
+	  if (result[0].id == ids[idx].id) {
+	    identicalObjectCount++;
+	  } else {
+	  }
 	}
-      }
 #endif
+      }
     }
 #ifdef GET_BLOB_EVAL
     std::cerr << identicalObjectCount << "/" << objects.size() << std::endl;
@@ -3692,7 +4584,6 @@ class QuantizerInstance : public Quantizer {
 	stringstream msg;
 	msg << "buildGlobalCodebookWithQIDIndex: fatal inner error. " << err.what() << " : ID=" << id << " size=" << invertedIndex.size();
 	NGTThrowException(msg);
-	NGTThrowException(msg);
       }
     }
     std::cerr << "creating the index..." << std::endl;
@@ -3842,7 +4733,8 @@ class QuantizerInstance : public Quantizer {
     for (size_t i = 0; i < localCodebookNo; i++) {
       lcodebook.push_back(&static_cast<NGT::GraphAndTreeIndex &>(localCodebookIndexes[i].getIndex()));
     }
-    vector<NGT::Index::InsertionResult> ids;
+    std::vector<NGT::Index::InsertionResult> ids;	
+    std::vector<uint32_t> ids2oidx;
     if (property.centroidCreationMode == CentroidCreationModeStaticLayer ||
 	property.centroidCreationMode == CentroidCreationModeStatic) {
       if (objectToBlobIndex.empty()) {
@@ -3855,7 +4747,7 @@ class QuantizerInstance : public Quantizer {
 	invertedIndex.reserve(invertedIndex.size() + objects.size());
 #endif
       } else {
-	getBlobIDFromObjectToBlobIndex(objects, ids);
+	getBlobIDFromObjectToBlobIndex(objects, ids, ids2oidx);
       }
     } else {
       std::stringstream msg;
@@ -3863,8 +4755,14 @@ class QuantizerInstance : public Quantizer {
       NGTThrowException(msg);
     }
     vector<LocalDatam> localData;
-    for (size_t i = 0; i < ids.size(); i++) {
-      setGlobalCodeToInvertedEntry(ids[i], objects[i], localData);
+    if (ids2oidx.empty()) {
+      for (size_t i = 0; i < ids.size(); i++) {
+	setGlobalCodeToInvertedEntry(ids[i], objects[i], localData);
+      }
+    } else {
+      for (size_t i = 0; i < ids.size(); i++) {
+	setGlobalCodeToInvertedEntry(ids[i], objects[ids2oidx[i]], localData);
+      }
     }
     float subspaceObjects[localData.size()][globalCodebookIndex.getObjectSpace().getPaddedDimension()];
     bool error = false;
@@ -3872,6 +4770,10 @@ class QuantizerInstance : public Quantizer {
 #pragma omp parallel for
     for (size_t i = 0; i < localData.size(); i++) {
       if (error) continue;
+      size_t objidx = i;
+      if (!ids2oidx.empty()) {
+	objidx = ids2oidx[i];
+      }
       IIEntry &invertedIndexEntry = *invertedIndex.at(localData[i].iiIdx);
 #ifdef NGTQ_SHARED_INVERTED_INDEX
 #ifdef NGTQ_QBG
@@ -3884,25 +4786,24 @@ class QuantizerInstance : public Quantizer {
 #endif
 #else
 #ifdef NGTQ_QBG
-
 #ifdef NGTQG_ROTATED_GLOBAL_CODEBOOKS
       if (!rotation.empty()) {
 #ifdef NGTQ_VECTOR_OBJECT
-	rotation.mul(objects[i].first.data());
+	rotation.mul(objects[objidx].first.data());
 #else
-	rotation.mul(static_cast<float*>(objects[i].first->getPointer()));
+	rotation.mul(static_cast<float*>(objects[objidx].first->getPointer()));
 #endif
       }
 #endif
       try {
 #ifdef NGTQ_VECTOR_OBJECT
-        (*generateResidualObject)(objects[i].first, // object
+        (*generateResidualObject)(objects[objidx].first, // object
 				  invertedIndexEntry.subspaceID,
-				  subspaceObjects[i]); // subspace objects
+				  subspaceObjects[objidx]); // subspace objects
 #else
-        (*generateResidualObject)(*objects[i].first, // object
+        (*generateResidualObject)(*objects[objidx].first, // object
 				  invertedIndexEntry.subspaceID,
-				  subspaceObjects[i]); // subspace objects
+				  subspaceObjects[objidx]); // subspace objects
 #endif
       } catch(NGT::Exception &err) {
 	if (errorMessage.empty()) {
@@ -3912,7 +4813,7 @@ class QuantizerInstance : public Quantizer {
 	continue;
       }
 #ifndef NGTQG_ROTATED_GLOBAL_CODEBOOKS
-      rotation.mul(subspaceObjects[i]);
+      rotation.mul(subspaceObjects[objidx]);
 #endif
 #else
       (*generateResidualObject)(invertedIndexEntry[localData[i].iiLocalIdx].id,
@@ -3942,11 +4843,11 @@ class QuantizerInstance : public Quantizer {
 	if (property.localCentroidCreationMode == CentroidCreationModeDynamicKmeans) {
 	  buildMultipleLocalCodebooks(localCodebookIndexes.data(), localCodebookNo, property.localCentroidLimit);
 	  (*generateResidualObject).set(localCodebookIndexes.data(), localCodebookNo);
-	  property.localCodebookState = true;
-	  localCodebookFull = false;
+	  property.localCodebookState = true;	
+	  localCodebookFull = false;		
 	  replaceInvertedIndexEntry(localCodebookNo);
 	} else {
-	  property.localCodebookState = true;
+	  property.localCodebookState = true;	
 	  localCodebookFull = false;
 	}
       }
@@ -4049,9 +4950,8 @@ class QuantizerInstance : public Quantizer {
   }
 #endif
 
-  void setupInvertedIndex(std::vector<std::vector<float>> &qCodebook,
-			  std::vector<uint32_t> &codebookIndex,
-			  std::vector<uint32_t> &objectIndex) {
+  void setupInvertedIndex(std::vector<uint32_t> &codebookIndex,
+			  std::vector<std::vector<uint32_t>> &objectIndex) {
 #if !defined(NGTQ_QBG)
     std::cerr << "setupInvertedIndex: Not implemented." << std::endl;
     abort();
@@ -4088,7 +4988,9 @@ class QuantizerInstance : public Quantizer {
     objectIndex.clear();
     std::vector<uint32_t> invertedIndexCount(codebookIndex.size());
     for (size_t idx = 0; idx < objectToBlobIndex.size(); idx++) {
-      invertedIndexCount[objectToBlobIndex[idx]]++;
+      for (auto bid : objectToBlobIndex[idx]) {
+	invertedIndexCount[bid]++;
+      }
     }
     for (size_t idx = 0; idx < codebookIndex.size(); idx++) {
       auto gid = idx + 1;
@@ -4155,52 +5057,10 @@ class QuantizerInstance : public Quantizer {
     gp.set(globalProperty);
     lp.set(localProperty);
 
-    gp.edgeSizeForSearch = 40;
-    lp.edgeSizeForSearch = 40;
+    gp.edgeSizeForSearch = 40;	
+    lp.edgeSizeForSearch = 40;	
 
     lp.objectType = NGT::Index::Property::ObjectType::Float;
-#ifdef NGTQ_QBG
-    if (property.genuineDimension > property.dimension) {
-      stringstream msg;
-      msg << "NGTQ::Quantizer::create: dimension must be larger than genuineDimension. " << property.dimension << ":" << property.genuineDimension << std::endl;
-      NGTThrowException(msg);
-    }
-#endif
-    gp.dimension = property.dimension;
-    if (gp.dimension == 0) {
-      stringstream msg;
-      msg << "NGTQ::Quantizer::create: specified dimension is zero!";
-      NGTThrowException(msg);
-    }
-    if (property.localDivisionNo == 0) {
-      NGTThrowException("NGTQ::Quantizer::create: # of subvectors is zero");
-    }
-    if (property.localDivisionNo != 1 && property.dimension % property.localDivisionNo != 0) {
-      stringstream msg;
-      msg << "NGTQ::Quantizer::create: The combination of dimension and localDivisionNo is invalid. "
-	  << "the localDivisionNo must be a divisor of the dimension. "
-	  << property.dimension << ":" << property.localDivisionNo;
-      NGTThrowException(msg);
-    }
-    lp.dimension = property.dimension / property.localDivisionNo;
-
-    switch (property.dataType) {
-    case DataTypeFloat:
-      gp.objectType = NGT::Index::Property::ObjectType::Float;
-      break;
-    case DataTypeFloat16:
-      gp.objectType = NGT::Index::Property::ObjectType::Float16;
-      break;
-    case DataTypeUint8:
-      gp.objectType = NGT::Index::Property::ObjectType::Uint8;
-      break;
-    default:
-      {
-	stringstream msg;
-	msg << "NGTQ::Quantizer::create: Inner error! Invalid data type.";
-	NGTThrowException(msg);
-      }
-    }
 
     switch (property.distanceType) {
     case DistanceType::DistanceTypeL1:
@@ -4245,10 +5105,14 @@ class QuantizerInstance : public Quantizer {
       gp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeNormalizedL2;
       lp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2;
       break;
-#ifdef NGT_INNER_PRODUCT
+#ifdef NGTQ_QBG
     case DistanceType::DistanceTypeInnerProduct:
       gp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2;
       lp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2;
+      if (property.dimension == property.genuineDimension) {
+	property.dimension++;
+      }
+      property.genuineDimension++;
       break;
 #endif
     default:
@@ -4259,6 +5123,31 @@ class QuantizerInstance : public Quantizer {
       }
     }
 
+#ifdef NGTQ_QBG
+    if (property.genuineDimension > property.dimension) {
+      stringstream msg;
+      msg << "NGTQ::Quantizer::create: dimension must be larger than genuineDimension. " << property.dimension << ":" << property.genuineDimension << std::endl;
+      NGTThrowException(msg);
+    }
+#endif
+    gp.dimension = property.dimension;
+    if (gp.dimension == 0) {
+      stringstream msg;
+      msg << "NGTQ::Quantizer::create: specified dimension is zero!";
+      NGTThrowException(msg);
+    }
+    if (property.localDivisionNo == 0) {
+      NGTThrowException("NGTQ::Quantizer::create: # of subvectors is zero");
+    }
+    if (property.localDivisionNo != 1 && property.dimension % property.localDivisionNo != 0) {
+      stringstream msg;
+      msg << "NGTQ::Quantizer::create: The combination of dimension and localDivisionNo is invalid. "
+	  << "the localDivisionNo must be a divisor of the dimension. "
+	  << property.dimension << ":" << property.localDivisionNo;
+      NGTThrowException(msg);
+    }
+    lp.dimension = property.dimension / property.localDivisionNo;
+
 #ifdef NGTQ_QBG
     createEmptyIndex(index, gp, lp, rotation, objectFile);
 #else
@@ -4908,8 +5797,8 @@ class Quantization {
  class Index {
  public:
    Index():quantizer(0) {}
-   Index(const string& index, bool rdOnly = false):quantizer(0) {
-     open(index, rdOnly);
+   Index(const string& index, bool rdOnly = false, DataType refinementDataType = DataTypeAny):quantizer(0) {
+     open(index, rdOnly, refinementDataType);
    }
    ~Index() { close(); }
 
@@ -4930,17 +5819,10 @@ class Quantization {
      property.setup(property);
      NGTQ::Quantizer *quantizer = NGTQ::Quantization::generate(property);
      try {
-#ifdef NGTQ_QBG
-       if (property.dimension == 0) {
-	 property.dimension = property.genuineDimension;
-       }
-       if (property.dimension % 4 != 0) {
-	 property.dimension = ((property.dimension - 1) / 4 + 1) * 4;
-       }
        quantizer->property = property;
+#ifdef NGTQ_QBG
        quantizer->create(index, globalProperty, localProperty, rotation, objectFile);
 #else
-       quantizer->property = property;
        quantizer->create(index, globalProperty, localProperty);
 #endif
        if (property.dimension == 0) {
@@ -4970,8 +5852,8 @@ class Quantization {
 #endif
 
 #ifndef NGTQ_QBG
-  static void rebuild(const string &indexName,
-		      const string &rebuiltIndexName
+  static void rebuild(const string &indexName,		
+		      const string &rebuiltIndexName	
 		     ) {
 
     const string srcObjectList = indexName + "/obj";
@@ -4998,12 +5880,12 @@ class Quantization {
   }
 #endif
 
-  void open(const string &index, bool readOnly = false) {
+  void open(const string &index, bool readOnly = false, DataType refinementDataType = DataTypeAny) {
      close();
      NGT::Property globalProperty;
      globalProperty.clear();
      globalProperty.edgeSizeForSearch = 40;
-     quantizer = getQuantizer(index, globalProperty, readOnly);
+     quantizer = getQuantizer(index, globalProperty, readOnly, refinementDataType);
      if ((quantizer->property.quantizerType == NGTQ::QuantizerTypeQG) && readOnly) {
        quantizer->closeCodebooks();
      }
@@ -5036,19 +5918,17 @@ class Quantization {
      getQuantizer().createIndex(beginID, endID);
    }
 
-   void createIndex(std::vector<std::vector<float>> &quantizationCodebook,
-		    std::vector<uint32_t> &codebookIndex,
-		    std::vector<uint32_t> &objectIndex,
+   void createIndex(std::vector<uint32_t> &codebookIndex,
+		    std::vector<std::vector<uint32_t>> &objectIndex,
 		    size_t beginID = 1, size_t endID = 0) {
-     setupInvertedIndex(quantizationCodebook, codebookIndex, objectIndex);
+     setupInvertedIndex(codebookIndex, objectIndex);
      createIndex(beginID, endID);
    }
 #endif
 
-   void setupInvertedIndex(std::vector<std::vector<float>> &quantizationCodebook,
-				 std::vector<uint32_t> &codebookIndex,
-				 std::vector<uint32_t> &objectIndex) {
-     getQuantizer().setupInvertedIndex(quantizationCodebook, codebookIndex, objectIndex);
+   void setupInvertedIndex(std::vector<uint32_t> &codebookIndex,
+			   std::vector<std::vector<uint32_t>> &objectIndex) {
+     getQuantizer().setupInvertedIndex(codebookIndex, objectIndex);
    }
 
 
@@ -5119,8 +5999,35 @@ class Quantization {
      return object;
    }
 
+   void setQuantizationFromMaxMin(float max, float min) {
+     float offset;
+     float scale;
+     if (getQuantizer().property.localClusterDataType == ClusterDataTypeSQSU8) {
+       offset = 0.0;
+       scale = std::max(fabs(max), fabs(min));
+     } else {
+       offset = min;
+       scale = max - offset;
+     }
+     setQuantization(scale, offset);
+   }
+
+   void setQuantization(float scale, float offset) {
+     getQuantizer().property.scalarQuantizationScale = scale;
+     getQuantizer().property.scalarQuantizationOffset = offset;
+   }
+
+   void setMaxMagnitude(float maxMagnitude) {
+     getQuantizer().property.maxMagnitude = maxMagnitude;
+   }
+
+   void saveProperty() {
+     getQuantizer().saveProperty();
+   }
+
  protected:
-   static NGTQ::Quantizer *getQuantizer(const string &index, NGT::Property &globalProperty, bool readOnly) {
+   static NGTQ::Quantizer *getQuantizer(const string &index, NGT::Property &globalProperty, bool readOnly,
+					DataType refinementDataType = DataTypeAny) {
      NGTQ::Property property;
      try {
        property.load(index);
@@ -5134,7 +6041,8 @@ class Quantization {
        NGTThrowException("NGTQ::Index: Cannot get quantizer.");
      }
      try {
-       quantizer->open(index, globalProperty, property.quantizerType == NGTQ::QuantizerTypeQBG ? readOnly : false);
+       quantizer->open(index, globalProperty, property.quantizerType == NGTQ::QuantizerTypeQBG ? readOnly : false,
+		       refinementDataType);
      } catch(NGT::Exception &err) {
        delete quantizer;
        throw err;
@@ -5147,4 +6055,41 @@ class Quantization {
    bool verbose;
  };
 
+  template<typename TYPE>
+  void NGTQ::ObjectProcessingStream<TYPE>::arrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+#ifdef NGTQ_QBG
+    if (&quantizer == 0) {
+      NGTThrowException("quantizer is invalid.");
+    }
+    for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) {
+      std::vector<float> object;
+      quantizer.objectList.get(invertedIndexObjects[oidx].id, object);
+#ifdef NGTQG_ROTATED_GLOBAL_CODEBOOKS
+      quantizer.rotation.mul(object);
+#endif
+      arrangeObject(oidx, object.data());
+    }
+#endif
+  }
+
+  inline void NGTQ::ScalarQuantizedInt8ObjectProcessingStream::arrange(NGTQ::InvertedIndexEntry<uint16_t> &invertedIndexObjects) {
+#ifdef NGTQ_QBG
+    if (&quantizer == 0) {
+      NGTThrowException("quantizer is invalid.");
+    }
+    float scale = quantizer.property.scalarQuantizationScale;
+    float offset = quantizer.property.scalarQuantizationOffset;
+    auto shift = quantizer.property.distanceType == DistanceType::DistanceTypeInnerProduct &&
+                 *dataTypeInfo == typeid(NGT::qsint8);
+    for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) {
+      std::vector<float> object;
+      quantizer.objectList.get(invertedIndexObjects[oidx].id, object);
+#ifdef NGTQG_ROTATED_GLOBAL_CODEBOOKS
+      quantizer.rotation.mul(object);
+#endif
+      arrangeObject(oidx, object, scale, offset, shift);
+    }
+#endif
+  }
+
 } // namespace NGTQ
diff --git a/lib/NGT/ObjectRepository.h b/lib/NGT/ObjectRepository.h
index 39e71af..725cccf 100644
--- a/lib/NGT/ObjectRepository.h
+++ b/lib/NGT/ObjectRepository.h
@@ -126,6 +126,7 @@ namespace NGT {
       if (dataSize > 0) {
 	reserve(size() + dataSize);
       }
+      size_t dim = innerProduct ? dimension - 1 : dimension;
       std::string line;
       size_t lineNo = 0;
       while (getline(is, line)) {
@@ -136,6 +137,7 @@ namespace NGT {
 	  break;
 	}
 	std::vector<double> object;
+	object.reserve(dim);
 	try {
 	  extractObjectFromText(line, "\t, ", object);
 	  PersistentObject *obj = 0;
@@ -179,7 +181,7 @@ namespace NGT {
 	  try {
 	    obj = allocateNormalizedPersistentObject(object);
 	  } catch (Exception &err) {
-	    std::cerr << err.what() << " continue..." << std::endl;
+	    std::cerr << err.what() << " " << typeid(T).name()  << ". continue..." << std::endl;
 	    obj = allocatePersistentObject(object);
 	  }
 	  push_back(obj);
@@ -250,6 +252,12 @@ namespace NGT {
 	  obj[i] = static_cast<float16>(o[i]);
 	}
 #endif
+      } else if (type == typeid(qsint8)) {
+	uint8_t *obj = static_cast<uint8_t*>(object);
+	for (size_t i = 0; i < size; i++) {
+	  auto i8 = static_cast<int8_t>(o[i]);
+	  obj[i] = *reinterpret_cast<uint8_t*>(&i8);
+	}
 #ifdef NGT_BFLOAT
       } else if (type == typeid(bfloat16)) {
 	bfloat16 *obj = static_cast<bfloat16*>(object);
@@ -273,16 +281,25 @@ namespace NGT {
     template <typename T>
       Object *allocateObject(T *o, size_t size) {
       size_t osize = paddedByteSize;
+      if (size == 0) {
+	  NGTThrowException("ObjectSpace::allocateObject: Fatal error! The specified dimension is zero.");
+      }
       if (sparse) {
 	size_t vsize = size * (type == typeid(float) ? 4 : 1);
 	osize = osize < vsize ? vsize : osize;
+      } else if (innerProduct) {
+	if (dimension != size && (dimension - 1) != size) {
+	  std::stringstream msg;
+	  msg << "ObjectSpace::allocateObject: Fatal error! The specified dimension is invalid. "
+	      << "The indexed objects=" << dimension << " The specified object=" << size
+	      << " for Inner product!";
+	  NGTThrowException(msg);
+	}
       } else {
-	if (size != 0 && 
-	    ((innerProduct && dimension != size && (dimension - 1) != size) ||
-	     (!innerProduct && dimension != size))) {
+	if (dimension != size) {
 	  std::stringstream msg;
-	  msg << "ObjectSpace::allocateObject: Fatal error! The specified dimension is invalid. The indexed objects="
-	      << dimension << " The specified object=" << size;
+	  msg << "ObjectSpace::allocateObject: Fatal error! The specified dimension is invalid. "
+	      << "The indexed objects=" << dimension << " The specified object=" << size;
 	  NGTThrowException(msg);
 	}
       }
@@ -447,6 +464,7 @@ namespace NGT {
     void setInnerProduct() { innerProduct = true; }
     size_t getByteSize() { return byteSize; }
     size_t insert(PersistentObject *obj) { return Parent::insert(obj); }
+    size_t insert(size_t id, PersistentObject *obj) { return Parent::insert(id, obj); }
     const size_t dimension;
     const std::type_info &type;
    protected:
diff --git a/lib/NGT/ObjectSpace.cpp b/lib/NGT/ObjectSpace.cpp
index dbbd7bd..35c58be 100644
--- a/lib/NGT/ObjectSpace.cpp
+++ b/lib/NGT/ObjectSpace.cpp
@@ -22,7 +22,9 @@
 NGT::Distance NGT::ObjectSpace::compareWithL1(NGT::Object &o1, NGT::Object &o2) {
   auto dim = getPaddedDimension();
   NGT::Distance d;
-  if (getObjectType() == typeid(uint8_t)) {
+  if (getObjectType() == typeid(uint8_t) ||
+      getObjectType() == typeid(quint8) ||
+      getObjectType() == typeid(qsint8)) {
     d = PrimitiveComparator::compareL1(reinterpret_cast<uint8_t*>(o1.getPointer()), 
 				       reinterpret_cast<uint8_t*>(o2.getPointer()), dim);
 #ifdef NGT_HALF_FLOAT
@@ -35,7 +37,8 @@ NGT::Distance NGT::ObjectSpace::compareWithL1(NGT::Object &o1, NGT::Object &o2)
 				       reinterpret_cast<float*>(o2.getPointer()), dim);
   } else {
     std::stringstream msg;
-    msg << "ObjectSpace::compareWithL1: Fatal Inner Error! Unexpected object type.";
+    msg << "ObjectSpace::compareWithL1: Fatal Inner Error! Unexpected object type. "
+	<< getObjectType().name();
     NGTThrowException(msg);
   }
   return d;
diff --git a/lib/NGT/ObjectSpace.h b/lib/NGT/ObjectSpace.h
index a2f7b98..ce3b916 100644
--- a/lib/NGT/ObjectSpace.h
+++ b/lib/NGT/ObjectSpace.h
@@ -175,9 +175,8 @@ namespace NGT {
       DistanceTypeJaccard		= 7,
       DistanceTypeSparseJaccard		= 8,
       DistanceTypeNormalizedL2		= 9,
-#ifdef NGT_INNER_PRODUCT
       DistanceTypeInnerProduct		= 10,
-#endif
+      DistanceTypeDotProduct		= 11,
       DistanceTypePoincare		= 100,  // added by Nyapicom
       DistanceTypeLorentz		= 101  // added by Nyapicom
     };
@@ -190,6 +189,8 @@ namespace NGT {
       ,
       Float16		= 3
 #endif
+      ,
+      Qsuint8		= 7
 #ifdef NGT_BFLOAT
       ,
       Bfloat16		= 5
@@ -198,10 +199,15 @@ namespace NGT {
 
 
     typedef std::priority_queue<ObjectDistance, std::vector<ObjectDistance>, std::less<ObjectDistance> > ResultSet;
-  ObjectSpace(size_t d):dimension(d), distanceType(DistanceTypeNone), comparator(0), normalization(false),
-                        prefetchOffset(-1), prefetchSize(-1)
+  ObjectSpace(size_t d):dimension(d), distanceType(DistanceTypeNone), comparator(0), comparatorForSearch(0),
+                        normalization(false),
+                        prefetchOffset(-1), prefetchSize(-1), quantizationScale(0.0), quantizationOffset(0.0),
+		        magnitude(-1)
     {}
-    virtual ~ObjectSpace() { if (comparator != 0) { delete comparator; } }
+    virtual ~ObjectSpace() {
+      if (comparator != 0) { delete comparator; } 
+      if (comparatorForSearch != 0) { delete comparatorForSearch; } 
+    }
     
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     virtual void open(const std::string &f, size_t shareMemorySize) = 0;
@@ -214,9 +220,17 @@ namespace NGT {
     virtual size_t insert(PersistentObject *obj) = 0;
 #else
     virtual size_t insert(Object *obj) = 0;
+    virtual void deleteAll() = 0;
 #endif
 
     Comparator &getComparator() { return *comparator; }
+    Comparator &getComparatorForSearch() {
+      if (comparatorForSearch != 0) {
+	return *comparatorForSearch;
+      } else {
+	return *comparator;
+      }
+    }
 
     virtual void serialize(const std::string &of) = 0;
     virtual void deserialize(const std::string &ifile) = 0;
@@ -235,6 +249,7 @@ namespace NGT {
 
     virtual void linearSearch(Object &query, double radius, size_t size,
 			      ObjectSpace::ResultSet &results) = 0;
+    virtual std::pair<float, float> getMaxMin(float cut = 0.01, size_t size = 0) = 0;
     virtual const std::type_info &getObjectType() = 0;
     virtual void show(std::ostream &os, Object &object) = 0;
     virtual size_t getSize() = 0;
@@ -265,20 +280,18 @@ namespace NGT {
 #endif
     virtual std::vector<float> getObject(Object &object) = 0;
     virtual void getObjects(const std::vector<size_t> &idxs, std::vector<std::vector<float>> &vs) = 0;
-#ifdef NGT_INNER_PRODUCT
     virtual float computeMaxMagnitude(ObjectID beginId) = 0;
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     virtual void setMagnitude(float maxMag, NGT::PersistentRepository<void> &graphNodes, NGT::ObjectID beginID) = 0;
 #else
     virtual void setMagnitude(float maxMag, NGT::Repository<void> &graphNodes, ObjectID beginId) = 0;
-#endif
 #endif
     DistanceType getDistanceType() { return distanceType; }
     size_t getDimension() { return dimension; }
     size_t getPaddedDimension() { return ((dimension - 1) / 16 + 1) * 16; }
 
     template <typename T>
-    void normalize(T *data, size_t dim) {
+    static void normalize(T *data, size_t dim) {
       float sum = 0.0;
       for (size_t i = 0; i < dim; i++) {
         sum += static_cast<float>(data[i]) * static_cast<float>(data[i]);
@@ -292,7 +305,8 @@ namespace NGT {
 	  }
 	}
 	std::stringstream msg;
-	msg << "ObjectSpace::normalize: Error! the object is an invalid zero vector for the cosine similarity.";
+	msg << "ObjectSpace::normalize: Error! the object is an invalid zero vector for the cosine similarity. "
+	    << typeid(T).name() << ".";
 	NGTThrowException(msg);
       }
       sum = sqrt(sum);
@@ -300,6 +314,12 @@ namespace NGT {
         data[i] = static_cast<float>(data[i]) / sum;
       }
     }
+
+    template<typename OTYPE>
+    static void normalize(std::vector<OTYPE> &object) {
+      ObjectSpace::normalize(object.data(), object.size());
+    }
+
     int32_t getPrefetchOffset() { return prefetchOffset; }
     int32_t setPrefetchOffset(int offset) {
       if (offset > 0) {
@@ -321,12 +341,154 @@ namespace NGT {
       return prefetchSize;
     }
 
-    bool isNormalizedDistance() {
-      return (getDistanceType() == ObjectSpace::DistanceTypeNormalizedAngle) ||
-	     (getDistanceType() == ObjectSpace::DistanceTypeNormalizedCosine) ||
-	     (getDistanceType() == ObjectSpace::DistanceTypeNormalizedL2);
+    bool quantizationIsEnabled() { return quantizationScale != 0.0; }
+    void setQuantization(float scale, float offset) {
+      quantizationScale = scale;
+      quantizationOffset = offset;
+    }
+    std::pair<float, float> getQuantization() {
+      return std::make_pair(quantizationScale, quantizationOffset);
+    }
+
+    template<typename T> static void quantizeSymmetrically(T *vector, size_t dim, float max, float scale) {
+      auto fmax = max + 0.5;
+      for (size_t i = 0; i < dim; i++) {
+	float fv = static_cast<float>(vector[i]);
+        fv = std::round(fv / scale * fmax);
+	fv = fv < -max ? -max : fv;
+	fv = fv > max ? max : fv;
+	vector[i] = static_cast<T>(fv);
+      }
+    }
+
+    template<typename T> static void quantizeSymmetrically(std::vector<T> &vector, float max, float scale) {
+      quantizeSymmetrically(vector.data(), vector.size(), max, scale);
+    }
+
+    template<typename T> static void dequantizeSymmetrically(T *vector, int8_t *cvector, size_t dimension, float max, float scale) {
+      auto fmax = max + 0.5;
+      for (size_t i = 0; i < dimension; i++) {
+        float fv = static_cast<float>(cvector[i]);
+        fv = (fv / fmax) * scale;
+        vector[i] = static_cast<T>(fv);
+      }
+    }
+
+    template<typename T> static void dequantizeSymmetrically(std::vector<T> &vector, int8_t *cvector, size_t dimension, float max, float scale) {
+      vector.resize(dimension);
+      dequantizeSymmetrically(vector.data(), cvector, dimension, max, scale);
+    }
+
+    template<typename T> static void quantize(T *vector, size_t dim, float max, float offset, float scale) {
+      auto fmax = max + 1.0;
+      for (size_t i = 0; i < dim; i++) {
+	float fv = static_cast<float>(vector[i]);
+	fv = floorf((fv - offset) / scale * fmax);
+	fv = fv < 0 ? 0 : fv;
+	fv = fv > max ? max : fv;
+	vector[i] = static_cast<T>(fv);
+      }
+    }
+    
+    template<typename T> static void quantize(std::vector<T> &vector, float max, float offset, float scale) {
+      quantize(vector.data(), vector.size(), max, offset, scale);
     }
 
+    template<typename T> static void dequantize(T *vector, uint8_t *cvector, size_t dimension, float max, float offset, float scale) {
+      auto fmax = max + 1.0;
+      for (size_t i = 0; i < dimension; i++) {
+        float fv = static_cast<float>(cvector[i]) + 0.5;
+        fv = (fv / fmax) * scale + offset;
+        vector[i] = static_cast<T>(fv);
+      }
+    }
+    
+    template<typename T> static void dequantize(std::vector<T> &vector, uint8_t *cvector, size_t dimension, float max, float offset, float scale) {
+      vector.resize(dimension);
+      dequantize(vector.data(), cvector, vector.size(), max, offset, scale);
+    }
+
+    template<typename T> void quantizeToQint8(std::vector<T> &vector, float offset, float scale, bool shift = false) {
+      quantizeToQint8(vector, getObjectType(), getDimension(), offset, scale, shift);
+    }
+    
+    template<typename T> static void quantizeToQint8(std::vector<T> &vector, const std::type_info &t, size_t dimension,
+						     float offset, float scale, bool shift = false) {
+      if (t == typeid(qsint8)) {
+	quantizeSymmetrically(vector, 127.0, scale);
+	if (shift) {
+	  for (size_t i = 0; i < dimension; i++) {
+	    vector[i] += 127;
+	  }
+	}
+      } else {
+	std::stringstream msg;
+	msg << "not supported type. " << t.name();
+	NGTThrowException(msg);
+      }
+    }
+    template<typename T> void quantizeToQint8(std::vector<T> &vector, bool shift = false) {
+      if (quantizationOffset == 0.0 && quantizationScale == 0.0) {
+	NGTThrowException("Error. Quantization parameters are not set yet.");
+      }
+      quantizeToQint8(vector, quantizationOffset, quantizationScale, shift);
+    }
+    static void quantizeToQint8(float *vector, size_t dimension, uint8_t *cvector,
+				ObjectType type,
+				float offset, float scale, bool shift = false) {
+      if (type == Qsuint8) {
+	quantizeSymmetrically(vector, dimension, 127.0, scale);
+	if (shift) {
+	  auto *cv = reinterpret_cast<uint8_t*>(cvector);
+	  for (size_t i = 0; i < dimension; i++) {
+	    cv[i] = static_cast<uint8_t>(vector[i] + 127);
+	  }
+	} else {
+	  auto *cv = reinterpret_cast<int8_t*>(cvector);
+	  for (size_t i = 0; i < dimension; i++) {
+	    cv[i] = static_cast<int8_t>(vector[i]);
+	  }
+	}
+      } else {
+	std::stringstream msg;
+	msg << "not supported type. " << type;
+	NGTThrowException(msg);
+      }
+    }
+    template<typename T> void dequantizeFromQint8(std::vector<T> &vector, uint8_t *cvector,
+						  bool shift = false) {
+      dequantizeFromQint8(vector, cvector, dimension, getObjectType(), quantizationOffset,
+			  quantizationScale, shift);
+    }
+    template<typename T> static void dequantizeFromQint8(std::vector<T> &vector, uint8_t *cvector, size_t dimension, 
+							 const std::type_info &t,
+							 float offset, float scale, bool shift = false) {
+      if (t == typeid(qsint8)) {
+	dequantizeSymmetrically(vector, reinterpret_cast<int8_t*>(cvector), dimension, 127.0, scale);
+	if (shift) {
+	  auto *cv = reinterpret_cast<uint8_t*>(cvector);
+	  for (size_t i = 0; i < dimension; i++) {
+	    cv[i] = static_cast<uint8_t>(vector[i] + 127);
+	  }
+	} else {
+	  auto *cv = reinterpret_cast<int8_t*>(cvector);
+	  for (size_t i = 0; i < dimension; i++) {
+	    cv[i] = static_cast<int8_t>(vector[i]);
+	  }
+	}
+      } else {
+	std::stringstream msg;
+	msg << "not supported type. " << t.name();
+	NGTThrowException(msg);
+      }
+    }
+    bool isQintObjectType() {
+      const std::type_info &t = getObjectType();
+      if (t == typeid(qsint8)) 	return true;
+      return false;
+    }
+    bool isNormalizedDistance() { return normalization; }
+
     NGT::Distance compareWithL1(NGT::Object &o1, NGT::Object &o2);
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     NGT::Distance compareWithL1(NGT::Object &o1, NGT::PersistentObject &o2);
@@ -336,9 +498,13 @@ namespace NGT {
     const size_t	dimension;
     DistanceType	distanceType;
     Comparator		*comparator;
+    Comparator		*comparatorForSearch;
     bool		normalization;
     int32_t		prefetchOffset;
     int32_t		prefetchSize;
+    float		quantizationScale;
+    float		quantizationOffset;
+    float		magnitude;
   };
 
   class BaseObject {
@@ -360,7 +526,7 @@ namespace NGT {
       NGT::Serializer::read(is, (uint8_t*)&(*this)[0], byteSize);
       if (is.eof()) {
 	std::stringstream msg;
-	msg << "ObjectSpace::BaseObject: Fatal Error! Read beyond the end of the object file. The object file is corrupted?" << byteSize;
+	msg << "ObjectSpace::BaseObject: Fatal Error! Read beyond the end of the object file. The object file is corrupted? " << byteSize;
 	NGTThrowException(msg);
       }
     }
@@ -373,6 +539,8 @@ namespace NGT {
       void *ref = (void*)&(*this)[0];
       if (t == typeid(uint8_t)) {
 	NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension);
+      } else if (t == typeid(qsint8)) {
+	NGT::Serializer::writeAsText(os, (int8_t*)ref, dimension);
       } else if (t == typeid(float)) {
 	NGT::Serializer::writeAsText(os, (float*)ref, dimension);
 #ifdef NGT_HALF_FLOAT
@@ -429,6 +597,10 @@ namespace NGT {
 	for (size_t d = 0; d < dimension; d++) {
 	  *(static_cast<uint8_t*>(ref) + d) = v[d];
 	}
+      } else if (t == typeid(qsint8)) {
+	for (size_t d = 0; d < dimension; d++) {
+	  *(static_cast<int8_t*>(ref) + d) = v[d];
+	}
       } else if (t == typeid(float)) {
 	for (size_t d = 0; d < dimension; d++) {
 	  *(static_cast<float*>(ref) + d) = v[d];
@@ -510,7 +682,7 @@ namespace NGT {
 
     void construct(size_t s) {
       assert(vector == 0);
-      size_t allocsize = ((s - 1) / 64 + 1) * 64;
+      size_t allocsize = ((s - 1) / 64 + 1) * 64;	
       vector = static_cast<uint8_t*>(MemoryCache::alignedAlloc(allocsize));
       memset(vector, 0, allocsize);
     }
@@ -582,7 +754,7 @@ namespace NGT {
     void construct(size_t s, SharedMemoryAllocator &allocator) {
       assert(array == 0);
       assert(s != 0);
-      size_t allocsize = ((s - 1) / 64 + 1) * 64;
+      size_t allocsize = ((s - 1) / 64 + 1) * 64;	
       array = allocator.getOffset(new(allocator) uint8_t[allocsize]);
       memset(getPointer(0, allocator), 0, allocsize);
     }
diff --git a/lib/NGT/ObjectSpaceRepository.h b/lib/NGT/ObjectSpaceRepository.h
index 9c636a6..575b493 100644
--- a/lib/NGT/ObjectSpaceRepository.h
+++ b/lib/NGT/ObjectSpaceRepository.h
@@ -288,30 +288,90 @@ namespace NGT {
 #endif
     };
 
-#ifdef NGT_INNER_PRODUCT
     class ComparatorInnerProduct : public Comparator {
       public:
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
         ComparatorInnerProduct(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {}
         double operator()(Object &objecta, Object &objectb) {
-	  return PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension);
+	  return -PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension);
 	}
 	double operator()(Object &objecta, PersistentObject &objectb) {
-	  return PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension);
+	  return -PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension);
         }
 	double operator()(PersistentObject &objecta, PersistentObject &objectb) {
-          return PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension);
+          return -PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension);
         }
 #else
         ComparatorInnerProduct(size_t d) : Comparator(d) {}
 	double operator()(Object &objecta, Object &objectb) {
-          return PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension);
+          auto d = PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension);
+	  return -d;
         }
 #endif
     };
+    class ComparatorInnerProductQsint8Quint8 : public Comparator {
+      public:
+#ifdef NGT_SHARED_MEMORY_ALLOCATOR
+        ComparatorInnerProductQsint8Quint8(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {}
+        double operator()(Object &objecta, Object &objectb) {
+	  return PrimitiveComparator::InnerProductQsint8::compare(&objecta[0], &objectb[0], dimension);
+	}
+	double operator()(Object &objecta, PersistentObject &objectb) {
+	  return PrimitiveComparator::InnerProductQsint8::compare(&objecta[0], &objectb.at(0, allocator), dimension);
+        }
+	double operator()(PersistentObject &objecta, PersistentObject &objectb) {
+          return PrimitiveComparator::InnerProductQsint8::compare(&objecta.at(0, allocator), &objectb.at(0, allocator), dimension);
+        }
+#else
+        ComparatorInnerProductQsint8Quint8(size_t d) : Comparator(d) {}
+	double operator()(Object &objecta, Object &objectb) {
+	  return PrimitiveComparator::InnerProductQsint8::compare(&objecta[0], &objectb[0], dimension);
+        }
+#endif
+    };
+    class ComparatorL2Quint8Quint8 : public Comparator {
+      public:
+#ifdef NGT_SHARED_MEMORY_ALLOCATOR
+        ComparatorL2Quint8Quint8(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {}
+	double operator()(Object &objecta, Object &objectb) {
+	  return PrimitiveComparator::compareL2((quint8*)&objecta[0], (quint8*)&objectb[0], dimension);
+	}
+	double operator()(Object &objecta, PersistentObject &objectb) {
+	  return PrimitiveComparator::compareL2((quint8*)&objecta[0], (quint8*)&objectb.at(0, allocator), dimension);
+	}
+	double operator()(PersistentObject &objecta, PersistentObject &objectb) {
+	  return PrimitiveComparator::compareL2((quint8*)&objecta.at(0, allocator), (quint8*)&objectb.at(0, allocator), dimension);
+	}
+#else
+        ComparatorL2Quint8Quint8(size_t d) : Comparator(d) {}
+	double operator()(Object &objecta, Object &objectb) {
+	  return PrimitiveComparator::compareL2((quint8*)&objecta[0], (quint8*)&objectb[0], dimension);
+	}
+#endif
+    };
+    class ComparatorDotProduct : public Comparator {
+      public:
+#ifdef NGT_SHARED_MEMORY_ALLOCATOR
+        ComparatorDotProduct(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {}
+        double operator()(Object &objecta, Object &objectb) {
+	  return magnitude - PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension);
+	}
+	double operator()(Object &objecta, PersistentObject &objectb) {
+	  return magnitude - PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension);
+        }
+	double operator()(PersistentObject &objecta, PersistentObject &objectb) {
+          return magnitude - PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension);
+        }
+#else
+        ComparatorDotProduct(size_t d) : Comparator(d) {}
+	double operator()(Object &objecta, Object &objectb) {
+	  return magnitude - PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension);
+	}
 #endif
+        float magnitude;
+    };
 
-    ObjectSpaceRepository(size_t d, const std::type_info &ot, DistanceType t) : ObjectSpace(d), ObjectRepository(d, ot) {
+    ObjectSpaceRepository(size_t d, const std::type_info &ot, DistanceType t, float mag = -1) : ObjectSpace(d), ObjectRepository(d, ot) {
      size_t objectSize = 0;
      if (ot == typeid(uint8_t)) {
        objectSize = sizeof(uint8_t);
@@ -321,6 +381,8 @@ namespace NGT {
      } else if (ot == typeid(float16)) {
        objectSize = sizeof(float16);
 #endif
+     } else if (ot == typeid(qsint8)) {
+       objectSize = sizeof(qsint8);
 #ifdef NGT_BFLOAT
      } else if (ot == typeid(bfloat16)) {
        objectSize = sizeof(bfloat16);
@@ -332,8 +394,9 @@ namespace NGT {
      }
      setLength(objectSize * d);
      setPaddedLength(objectSize * ObjectSpace::getPaddedDimension());
+     magnitude = mag;
      setDistanceType(t);
-   }
+    }
 
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     void open(const std::string &f, size_t sharedMemorySize) { ObjectRepository::open(f, sharedMemorySize); }
@@ -390,7 +453,12 @@ namespace NGT {
     void setDistanceType(DistanceType t) {
       if (comparator != 0) {
 	delete comparator;
-      }
+	comparator = 0;
+      } 
+      if (comparatorForSearch != 0) {
+	delete comparatorForSearch;
+	comparatorForSearch = 0;
+      } 
       assert(ObjectSpace::dimension != 0);
       distanceType = t;
       switch (distanceType) {
@@ -435,12 +503,25 @@ namespace NGT {
 	comparator = new ObjectSpaceRepository::ComparatorNormalizedCosineSimilarity(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator);
 	normalization = true;
 	break;
-#ifdef NGT_INNER_PRODUCT
       case DistanceTypeInnerProduct:
-	comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator);
-	setInnerProduct();
+	{
+	  if (typeid(OBJECT_TYPE) == typeid(qsint8)) {
+	    comparator = new ObjectSpaceRepository::ComparatorL2Quint8Quint8(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator);
+	    comparatorForSearch = new ObjectSpaceRepository::ComparatorInnerProductQsint8Quint8(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator);
+	  } else {
+	    comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator);
+	  }
+	  setInnerProduct();
+	}
+        break;
+      case DistanceTypeDotProduct:
+	{
+	  auto *comp = new ObjectSpaceRepository::ComparatorDotProduct(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator);
+	  comp->magnitude = magnitude;
+	  comparator = comp;
+	  setInnerProduct();
+	}
         break;
-#endif
 #else
       case DistanceTypeL1:
 	comparator = new ObjectSpaceRepository::ComparatorL1(ObjectSpace::getPaddedDimension());
@@ -482,12 +563,25 @@ namespace NGT {
 	comparator = new ObjectSpaceRepository::ComparatorNormalizedCosineSimilarity(ObjectSpace::getPaddedDimension());
 	normalization = true;
 	break;
-#ifdef NGT_INNER_PRODUCT
       case DistanceTypeInnerProduct:
-	comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension());
-	setInnerProduct();
+	{
+	  if (typeid(OBJECT_TYPE) == typeid(qsint8)) {
+	    comparator = new ObjectSpaceRepository::ComparatorL2Quint8Quint8(ObjectSpace::getPaddedDimension());
+	    comparatorForSearch = new ObjectSpaceRepository::ComparatorInnerProductQsint8Quint8(ObjectSpace::getPaddedDimension());
+	  } else {
+	    comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension());
+	  }
+	  setInnerProduct();
+	}
+        break;
+      case DistanceTypeDotProduct:
+	{
+	  auto *comp = new ObjectSpaceRepository::ComparatorDotProduct(ObjectSpace::getPaddedDimension());
+	  comp->magnitude = magnitude;
+	  comparator = comp;
+	  setInnerProduct();
+	}
         break;
-#endif
 #endif
       default:
         std::stringstream msg;
@@ -511,6 +605,7 @@ namespace NGT {
     void append(const float16 *data, size_t dataSize) { ObjectRepository::append(data, dataSize); }
 #endif
 
+    void deleteAll() { ObjectRepository::deleteAll(); }
 
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     PersistentObject *allocatePersistentObject(Object &obj) {
@@ -524,6 +619,34 @@ namespace NGT {
     void remove(size_t id) { ObjectRepository::remove(id); }
 
     void linearSearch(Object &query, double radius, size_t size, ObjectSpace::ResultSet &results) {
+      if (distanceType == DistanceTypeInnerProduct) {
+	Comparator *comp;
+	if (typeid(OBJECT_TYPE) == typeid(qsint8)) {
+#ifdef NGT_SHARED_MEMORY_ALLOCATOR
+	  comp = new ObjectSpaceRepository::ComparatorInnerProductQsint8Quint8(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator);
+#else
+	  comp = new ObjectSpaceRepository::ComparatorInnerProductQsint8Quint8(ObjectSpace::getPaddedDimension());
+#endif
+	} else {
+#ifdef NGT_SHARED_MEMORY_ALLOCATOR
+	  comp = new ObjectSpaceRepository::ComparatorInnerProduct(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator);
+#else
+	  comp = new ObjectSpaceRepository::ComparatorInnerProduct(ObjectSpace::getPaddedDimension());
+#endif
+	}
+	try {
+	  linearSearch(query, radius, size, results, *comp);
+	} catch(Exception &err) {
+	  delete comp;
+	  throw err;
+	}
+	delete comp;
+      } else {
+	linearSearch(query, radius, size, results, *comparator);
+      }
+    }
+    void linearSearch(Object &query, double radius, size_t size, ObjectSpace::ResultSet &results,
+		      Comparator &comparator) {
       if (!results.empty()) {
 	NGTThrowException("lenearSearch: results is not empty");
       }
@@ -546,9 +669,9 @@ namespace NGT {
 	  continue;
 	}
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
-	Distance d = (*comparator)((Object&)query, (PersistentObject&)*rep[idx]);
+	Distance d = comparator((Object&)query, (PersistentObject&)*rep[idx]);
 #else
-	Distance d = (*comparator)((Object&)query, (Object&)*rep[idx]);
+	Distance d = comparator((Object&)query, (Object&)*rep[idx]);
 #endif
 	if (radius < 0.0 || d <= radius) {
 	  NGT::ObjectDistance obj(idx, d);
@@ -561,7 +684,6 @@ namespace NGT {
       return;
     }
 
-#ifdef NGT_INNER_PRODUCT
     float computeMaxMagnitude(NGT::ObjectID beginID = 1) {
       float maxMag = 0.0;
       ObjectRepository &rep = *this;
@@ -594,7 +716,6 @@ namespace NGT {
       return maxMag;
     }
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
-    //void setMagnitude(float maxMag, NGT::Vector<off_t> &graphNodes, NGT::ObjectID beginID = 1) {
   void setMagnitude(float maxMag, NGT::PersistentRepository<void> &graphNodes, NGT::ObjectID beginID = 1) {
 #else
   void setMagnitude(float maxMag, NGT::Repository<void> &graphNodes, NGT::ObjectID beginID = 1) {
@@ -630,8 +751,43 @@ namespace NGT {
 #endif
       }
     }
-#endif
 
+    std::pair<float, float> getMaxMin(float clippingRate = 0.02, size_t size = 0) {
+      ObjectRepository &rep = *this;
+      if (size == 0) {
+	size = rep.size();
+      } else {
+	size = size > rep.size() ? size : rep.size();
+      }
+      auto dim = getDimension();
+      auto clippingSize = static_cast<float>(size) * clippingRate;
+      clippingSize = clippingSize == 0 ? 1 : clippingSize;
+      std::priority_queue<float> min;
+      std::priority_queue<float, std::vector<float>, std::greater<float>> max;
+      std::cerr << "repo size=" << rep.size() << " " << clippingSize << std::endl;
+      for (size_t idx = 1; idx < rep.size(); idx++) {
+	try {
+	  OBJECT_TYPE *obj = static_cast<OBJECT_TYPE*>(getObject(idx));
+	  for (size_t i = 0; i < dim; i++) {
+	    float v = static_cast<float>(obj[i]);
+	    if (max.size() < clippingSize) {
+	      max.push(v);
+	    } else if (max.top() <= v) {
+	      max.push(v);
+	      max.pop();
+	    }
+	    if (min.size() < clippingSize) {
+	      min.push(v);
+	    } else if (min.top() >= v) {
+	      min.push(v);
+	      min.pop();
+	    }
+	  }
+	} catch(...) {}
+      }
+      auto ret = std::make_pair(max.top(), min.top());
+      return ret;
+    }
 
     void *getObject(size_t idx) {
       if (isEmpty(idx)) {
@@ -688,12 +844,12 @@ namespace NGT {
 
 #ifdef NGT_SHARED_MEMORY_ALLOCATOR
     void normalize(PersistentObject &object) {
-      OBJECT_TYPE *obj = (OBJECT_TYPE*)&object.at(0, getRepository().getAllocator());
+      auto *obj = reinterpret_cast<OBJECT_TYPE*>(object.getPointer(getRepository().getAllocator()));
       ObjectSpace::normalize(obj, ObjectSpace::dimension);
     }
 #endif
     void normalize(Object &object) {
-      OBJECT_TYPE *obj = (OBJECT_TYPE*)&object[0];
+      auto *obj = reinterpret_cast<OBJECT_TYPE*>(object.getPointer());
       ObjectSpace::normalize(obj, ObjectSpace::dimension);
     }
 
@@ -709,71 +865,177 @@ namespace NGT {
     }
 
     Object *allocateNormalizedObject(const std::vector<double> &obj) {
-      Object *allocatedObject = ObjectRepository::allocateObject(obj);
-      if (normalization) {
-	normalize(*allocatedObject);
+      Object *allocatedObject = 0;
+      if (quantizationIsEnabled()) {
+	std::vector<float> qobj(obj.begin(), obj.end());
+	if (normalization) {
+	  ObjectSpace::normalize(qobj);
+	}
+	quantizeToQint8(qobj);
+	allocatedObject = ObjectRepository::allocateObject(qobj);
+      } else {
+	allocatedObject = ObjectRepository::allocateObject(obj);
+	if (normalization) {
+	  normalize(*allocatedObject);
+	}
       }
       return allocatedObject;
     }
     Object *allocateNormalizedObject(const std::vector<float> &obj) {
-      Object *allocatedObject = ObjectRepository::allocateObject(obj);
-      if (normalization) {
-	normalize(*allocatedObject);
+      Object *allocatedObject = 0;
+      if (quantizationIsEnabled()) {
+	std::vector<float> qobj(obj.begin(), obj.end());
+	if (normalization) {
+	  ObjectSpace::normalize(qobj);
+	}
+	quantizeToQint8(qobj);
+	allocatedObject = ObjectRepository::allocateObject(qobj);
+      } else {
+	allocatedObject = ObjectRepository::allocateObject(obj);
+	if (normalization) {
+	  normalize(*allocatedObject);
+	}
       }
       return allocatedObject;
     }
 #ifdef NGT_HALF_FLOAT
     Object *allocateNormalizedObject(const std::vector<float16> &obj) {
-      Object *allocatedObject = ObjectRepository::allocateObject(obj);
-      if (normalization) {
-	normalize(*allocatedObject);
+      Object *allocatedObject = 0;
+      if (quantizationIsEnabled()) {
+	std::vector<float> qobj(obj.begin(), obj.end());
+	if (normalization) {
+	  ObjectSpace::normalize(qobj);
+	}
+	quantizeToQint8(qobj);
+	allocatedObject = ObjectRepository::allocateObject(qobj);
+      } else {
+	allocatedObject = ObjectRepository::allocateObject(obj);
+	if (normalization) {
+	  normalize(*allocatedObject);
+	}
       }
       return allocatedObject;
     }
 #endif
     Object *allocateNormalizedObject(const std::vector<uint8_t> &obj) {
-      Object *allocatedObject = ObjectRepository::allocateObject(obj);
-      if (normalization) {
-	normalize(*allocatedObject);
+      Object *allocatedObject = 0;
+      if (quantizationIsEnabled()) {
+	std::vector<float> qobj(obj.begin(), obj.end());
+	if (normalization) {
+	  ObjectSpace::normalize(qobj);
+	}
+	quantizeToQint8(qobj);
+	allocatedObject = ObjectRepository::allocateObject(qobj);
+      } else {
+	allocatedObject = ObjectRepository::allocateObject(obj);
+	if (normalization) {
+	  normalize(*allocatedObject);
+	}
       }
       return allocatedObject;
     }
 
     Object *allocateNormalizedObject(const float *obj, size_t size) {
-      Object *allocatedObject = ObjectRepository::allocateObject(obj, size);
-      if (normalization) {
-	normalize(*allocatedObject);
+      Object *allocatedObject = 0;
+      try {
+	if (quantizationIsEnabled()) {
+	  std::vector<float> qobj(obj, obj + size);
+	  if (normalization) {
+	    ObjectSpace::normalize(qobj);
+	  }
+	  quantizeToQint8(qobj);
+	  allocatedObject = ObjectRepository::allocateObject(qobj);
+	} else {
+	  allocatedObject = ObjectRepository::allocateObject(obj, size);
+	  if (normalization) {
+	    normalize(*allocatedObject);
+	  }
+	}
+      } catch (Exception &err) {
+	std::stringstream msg;
+	msg << err.what() << " quantization=" << (quantizationIsEnabled() ? "True" : "False");
+	NGTThrowException(msg);
       }
       return allocatedObject;
     }
 
     PersistentObject *allocateNormalizedPersistentObject(const std::vector<double> &obj) {
-      PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj);
-      if (normalization) {
-	normalize(*allocatedObject);
+      PersistentObject *allocatedObject = 0;
+      if (quantizationIsEnabled()) {
+	std::vector<float> qobj(obj.begin(), obj.end());
+	if (normalization) {
+	  ObjectSpace::normalize(qobj);
+	}
+	auto shift = distanceType == DistanceTypeInnerProduct && typeid(OBJECT_TYPE) == typeid(qsint8);
+	quantizeToQint8(qobj, shift);
+	allocatedObject = ObjectRepository::allocatePersistentObject(qobj);
+      } else {
+	allocatedObject = ObjectRepository::allocatePersistentObject(obj);
+	if (normalization) {
+	  normalize(*allocatedObject);
+	}
       }
       return allocatedObject;
     }
     PersistentObject *allocateNormalizedPersistentObject(const std::vector<float> &obj) {
-      PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj);
-      if (normalization) {
-	normalize(*allocatedObject);
+      PersistentObject *allocatedObject = 0;
+      try {
+	if (quantizationIsEnabled()) {
+	  std::vector<float> qobj(obj.begin(), obj.end());
+	  if (normalization) {
+	    ObjectSpace::normalize(qobj);
+	  }
+	  auto shift = distanceType == DistanceTypeInnerProduct && typeid(OBJECT_TYPE) == typeid(qsint8);
+	  quantizeToQint8(qobj, shift);
+	  allocatedObject = ObjectRepository::allocatePersistentObject(qobj);
+	} else {
+	  allocatedObject = ObjectRepository::allocatePersistentObject(obj);
+	  if (normalization) {
+	    normalize(*allocatedObject);
+	  }
+	}
+      } catch (Exception &err) {
+	std::stringstream msg;
+	msg << err.what() << " quantization=" << (quantizationIsEnabled() ? "True" : "False");
+	NGTThrowException(msg);
       }
       return allocatedObject;
     }
 #ifdef NGT_HALF_FLOAT
     PersistentObject *allocateNormalizedPersistentObject(const std::vector<float16> &obj) {
-      PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj);
-      if (normalization) {
-	normalize(*allocatedObject);
+      PersistentObject *allocatedObject = 0;
+      if (quantizationIsEnabled()) {
+	std::vector<float> qobj(obj.begin(), obj.end());
+	if (normalization) {
+	  ObjectSpace::normalize(qobj);
+	}
+	auto shift = distanceType == DistanceTypeInnerProduct && typeid(OBJECT_TYPE) == typeid(qsint8);
+	quantizeToQint8(qobj, shift);
+	allocatedObject = ObjectRepository::allocatePersistentObject(qobj);
+      } else {
+	allocatedObject = ObjectRepository::allocatePersistentObject(obj);
+	if (normalization) {
+	  normalize(*allocatedObject);
+	}
       }
       return allocatedObject;
     }
 #endif
     PersistentObject *allocateNormalizedPersistentObject(const std::vector<uint8_t> &obj) {
-      PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj);
-      if (normalization) {
-	normalize(*allocatedObject);
+      PersistentObject *allocatedObject = 0;
+      if (quantizationIsEnabled()) {
+	std::vector<float> qobj(obj.begin(), obj.end());
+	if (normalization) {
+	  ObjectSpace::normalize(qobj);
+	}
+	auto shift = distanceType == DistanceTypeInnerProduct && typeid(OBJECT_TYPE) == typeid(qsint8);
+	quantizeToQint8(qobj, shift);
+	allocatedObject = ObjectRepository::allocatePersistentObject(qobj);
+      } else {
+	allocatedObject = ObjectRepository::allocatePersistentObject(obj);
+	if (normalization) {
+	  normalize(*allocatedObject);
+	}
       }
       return allocatedObject;
     }
@@ -833,6 +1095,8 @@ namespace NGT {
     size_t dimension = objectspace->getDimension();
     if (t == typeid(uint8_t)) {
       NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension);
+    } else if (t == typeid(qsint8)) {
+      NGT::Serializer::writeAsText(os, (int8_t*)ref, dimension);
     } else if (t == typeid(float)) {
       NGT::Serializer::writeAsText(os, (float*)ref, dimension);
 #ifdef NGT_HALF_FLOAT
diff --git a/lib/NGT/Optimizer.h b/lib/NGT/Optimizer.h
index 4f1afe3..610e357 100644
--- a/lib/NGT/Optimizer.h
+++ b/lib/NGT/Optimizer.h
@@ -262,7 +262,7 @@ namespace NGT {
 	NGT::Common::tokenize(line, result, " \t");
 	if (result.size() < 3) {
 	  std::stringstream msg;
-	  msg << "result format is wrong. ";
+	  msg << "result format is wrong. [" << line << "]";
 	  NGTThrowException(msg);
 	}
 	size_t id = NGT::Common::strtol(result[1]);
@@ -320,7 +320,7 @@ namespace NGT {
 	      NGT::Common::tokenize(line, result, " \t");
 	      if (result.size() < 3) {
 		std::stringstream msg;
-		msg << "result format is wrong. ";
+		msg << "result format is wrong. [" << line << "]";
 		NGTThrowException(msg);
 	      }
 	      size_t rank = NGT::Common::strtol(result[0]);
@@ -398,7 +398,7 @@ namespace NGT {
 		    double key;
 		    if (fluctuation != "") {
 		      key = NGT::Common::strtod(fluctuation);
-		      keyValue = "Factor (Epsilon or a fluctuating value)";
+		      keyValue = "Factor (Epsilon or any fluctuating value)";
 		    } else {
 		      std::stringstream msg;
 		      msg << "check: inner error! " << fluctuation;
@@ -480,8 +480,9 @@ namespace NGT {
 		std::vector<std::string> result;
 		NGT::Common::tokenize(line, result, " \t");
 		if (result.size() < 3) {
-		  std::cerr << "result format is wrong. " << std::endl;
-		  abort();
+		  std::stringstream msg;
+		  msg << "result format is wrong. [" << line << "]";
+		  NGTThrowException(msg);
 		}
 		size_t rank = NGT::Common::strtol(result[0]);
 		size_t id = NGT::Common::strtol(result[1]);
@@ -996,41 +997,18 @@ namespace NGT {
 
 
     void outputObject(std::ostream &os, std::vector<float> &v, NGT::Property &prop) {
-      switch (prop.objectType) {
-      case NGT::ObjectSpace::ObjectType::Uint8:
-	{
-	  for (auto i = v.begin(); i != v.end(); ++i) {
-	    int d = *i;
-	    os << d;
-	    if (i + 1 != v.end()) {
-	      os << "\t";
-	    }
-	  }
-	  os << std::endl;
+      for (auto i = v.begin(); i != v.end(); ++i) {
+	os << *i;
+	if (i + 1 != v.end()) {
+	  os << "\t";
 	}
-	break;
-      default:
-#ifdef NGT_HALF_FLOAT
-      case NGT::ObjectSpace::ObjectType::Float16:
-#endif
-      case NGT::ObjectSpace::ObjectType::Float:
-	{
-	  for (auto i = v.begin(); i != v.end(); ++i) {
-	    os << *i;
-	    if (i + 1 != v.end()) {
-	      os << "\t";
-	    }
-	  }
-	  os << std::endl;
-	}
-	break;
       }
+      os << std::endl;
     }
 
     void outputObjects(std::vector<std::vector<float>> &vs, std::ostream &os) {
       NGT::Property prop;
       index.getProperty(prop);
-
       for (auto i = vs.begin(); i != vs.end(); ++i) {
 	outputObject(os, *i, prop);
       }
@@ -1059,7 +1037,6 @@ namespace NGT {
 	}
 	break;
 #endif
-      default:
       case NGT::ObjectSpace::ObjectType::Float:
 	{
 	  auto *obj = static_cast<float*>(index.getObjectSpace().getObject(id));
@@ -1069,46 +1046,26 @@ namespace NGT {
 	  }
 	}
 	break;
+      case NGT::ObjectSpace::ObjectType::Qsuint8:
+	{
+	  auto *obj = static_cast<uint8_t*>(index.getObjectSpace().getObject(id));
+	  index.getObjectSpace().dequantizeFromQint8(v, obj);
+	}
+	break;
+      default:
+	std::stringstream msg;
+	msg << "Fatal error! Invalid object type. (" << prop.objectType << ")" << std::endl;
+	NGTThrowException(msg);
       }
       return v;
     }
 
     std::vector<float> meanObject(size_t id1, size_t id2, NGT::Property &prop) {
       std::vector<float> v;
-      switch (prop.objectType) {
-      case NGT::ObjectSpace::ObjectType::Uint8:
-	{
-	  auto *obj1 = static_cast<uint8_t*>(index.getObjectSpace().getObject(id1));
-	  auto *obj2 = static_cast<uint8_t*>(index.getObjectSpace().getObject(id2));
-	  for (int i = 0; i < prop.dimension; i++) {
-	    int d = (*obj1++ + *obj2++) / 2;
-	    v.push_back(d);
-	  }
-	}
-	break;
-#ifdef NGT_HALF_FLOAT
-      case NGT::ObjectSpace::ObjectType::Float16:
-	{
-	  auto *obj1 = static_cast<NGT::float16*>(index.getObjectSpace().getObject(id1));
-	  auto *obj2 = static_cast<NGT::float16*>(index.getObjectSpace().getObject(id2));
-	  for (int i = 0; i < prop.dimension; i++) {
-	    float d = (*obj1++ + *obj2++) / 2.0F;
-	    v.push_back(d);
-	  }
-	}
-	break;
-#endif
-      default:
-      case NGT::ObjectSpace::ObjectType::Float:
-	{
-	  auto *obj1 = static_cast<float*>(index.getObjectSpace().getObject(id1));
-	  auto *obj2 = static_cast<float*>(index.getObjectSpace().getObject(id2));
-	  for (int i = 0; i < prop.dimension; i++) {
-	    float d = (*obj1++ + *obj2++) / 2.0F;
-	    v.push_back(d);
-	  }
-	}
-	break;
+      auto obj1 = extractObject(id1, prop);
+      auto obj2 = extractObject(id2, prop);
+      for (int i = 0; i < prop.dimension; i++) {
+	v.emplace_back((obj1[i] + obj2[i]) / 2.0);
       }
       return v;
     }
@@ -1116,7 +1073,6 @@ namespace NGT {
     void extractQueries(std::vector<std::vector<float>> &queries, std::ostream &os) {
       NGT::Property prop;
       index.getProperty(prop);
-
       for (auto i = queries.begin(); i != queries.end(); ++i) {
 	outputObject(os, *i, prop);
       }
@@ -1518,7 +1474,6 @@ namespace NGT {
 
     static std::vector<std::pair<float, double>>
       generateAccuracyTable(NGT::Index &index, size_t nOfResults = 50, size_t querySize = 100) {
-
       NGT::Property prop;
       index.getProperty(prop);
       if (prop.edgeSizeForSearch != 0 && prop.edgeSizeForSearch != -2) {
@@ -1528,13 +1483,10 @@ namespace NGT {
       }
 
       NGT::Optimizer optimizer(index, nOfResults);
-
       float maxEpsilon = 0.0;
       std::stringstream queryStream;
       std::stringstream gtStream;
-
       optimizer.generatePseudoGroundTruth(querySize, maxEpsilon, queryStream, gtStream);
-
       std::map<float, double> map;
       {
 	float interval = 0.05;
@@ -1563,7 +1515,7 @@ namespace NGT {
 	    if (accuracy - prev < 0.02) {
 	      interval *= 2.0;
 	    } else if (accuracy - prev > 0.05 && interval > 0.0001) {
-
+	
 	      epsilon -= interval;
 	      interval /= 2.0;
 	      accuracy = prev;
diff --git a/lib/NGT/PrimitiveComparator.h b/lib/NGT/PrimitiveComparator.h
index 1c3bbf5..669fead 100644
--- a/lib/NGT/PrimitiveComparator.h
+++ b/lib/NGT/PrimitiveComparator.h
@@ -310,8 +310,105 @@ namespace NGT {
       return sqrt(s);
     }
 
+    inline static double compareL2(const quint8 *a, const quint8 *b, size_t size) {
 
+      auto *u8a = reinterpret_cast<const uint8_t*>(a);
+      auto *u8b = reinterpret_cast<const uint8_t*>(b);
+
+      const unsigned char *last = u8a + size;
+#if defined(NGT_AVX512)
+      __m512i sum512 = _mm512_setzero_si512();
+      {
+	const unsigned char *lastgroup = last - 63;
+	while (u8a < lastgroup) {
+	  __m512i mu8a = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(u8a));
+	  __m512i mu8b = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(u8b));
+	  __mmask64 m = _mm512_cmplt_epu8_mask(mu8a, mu8b);
+	  __m512i x = _mm512_add_epi8(_mm512_maskz_subs_epu8(m, mu8b, mu8a),
+				      _mm512_maskz_subs_epu8(~m, mu8a, mu8b));
+	  __m512i xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,0));
+	  sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(xi16, xi16));
+	  xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,1));
+	  sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(xi16, xi16));
+	  u8a += 64;
+	  u8b += 64;
+	}
+      }
+      {
+	const unsigned char *lastgroup = last - 31;
+	while (u8a < lastgroup) {
+	  __m256i mu8a = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(u8a));
+	  __m256i mu8b = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(u8b));
+	  __mmask32 m = _mm256_cmplt_epu8_mask(mu8a, mu8b);
+	  __m256i x = _mm256_add_epi8(_mm256_maskz_subs_epu8(m, mu8b, mu8a),
+				      _mm256_maskz_subs_epu8(~m, mu8a, mu8b));
+	  __m512i xi16 = _mm512_cvtepu8_epi16(x);
+	  sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(xi16, xi16));
+	  u8a += 32;
+	  u8b += 32;
+	}
+      }
+      __m256i sum256 = _mm256_add_epi32(_mm512_extracti32x8_epi32(sum512, 0), _mm512_extracti32x8_epi32(sum512, 1));
+#elif defined(NGT_AVX2)
+      __m256i sum256 = _mm256_setzero_si256();
+      {
+	const unsigned char *lastgroup = last - 31;
+	while (u8a < lastgroup) {
+	  __m256i x1 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8a));
+	  __m256i x2 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8b));
+	  __m256i xi16 = _mm256_subs_epi16(x1, x2);
+	  sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(xi16, xi16));
+	  u8a += 16;
+	  u8b += 16;
+	  x1 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8a));
+	  x2 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8b));
+	  xi16 = _mm256_subs_epi16(x1, x2);
+	  sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(xi16, xi16));
+	  u8a += 16;
+	  u8b += 16;
+	}
+      }
+#else
+      __m256i sum256 = _mm256_setzero_si256();
+#endif
+      {
+	const unsigned char *lastgroup = last - 15;
+
+	while (u8a < lastgroup) {
+	  __m256i x1 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8a));
+	  __m256i x2 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8b));
+	  __m256i xi16 = _mm256_subs_epi16(x1, x2);
+	  sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(xi16, xi16));
+	  u8a += 16;
+	  u8b += 16;
+	}
+      }
+
+      const __m256i value0 = _mm256_set1_epi32(0);
+      __m256i tmp1 = _mm256_hadd_epi32(sum256, value0);
+      __m256i tmp2 = _mm256_hadd_epi32(tmp1, value0);
+      double s = _mm256_extract_epi32(tmp2, 0) + _mm256_extract_epi32(tmp2, 4);
+      return s;
+
+    }
 #endif
+    inline static double compareL2(const qsint8 *a, const qsint8 *b, size_t size) {
+      auto *i8a = reinterpret_cast<const int8_t*>(a);
+      auto *i8b = reinterpret_cast<const int8_t*>(b);
+      double sum = 0.0;
+      for (size_t loc = 0; loc < size; loc++) {
+	auto sub = static_cast<double>(*i8a) - static_cast<double>(*i8b);
+	sum += sub * sub;
+	i8a++;
+	i8b++;
+      }
+      return sqrt(sum);
+    }
+
+    inline static double compareL2(const qsint8 *a, const quint8 *b, size_t size) {
+      NGTThrowException("Not supported.");
+      return 0.00;
+    }
 
     template <typename OBJECT_TYPE>
     inline static double compareNormalizedL2(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) {
@@ -323,8 +420,6 @@ namespace NGT {
       }
     }
 
-
-#if defined(NGT_NO_AVX)
     template <typename OBJECT_TYPE, typename COMPARE_TYPE>
     static double compareL1(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) {
       const OBJECT_TYPE *last = a + size;
@@ -347,10 +442,15 @@ namespace NGT {
       return d;
     }
 
+#if defined(NGT_NO_AVX)
     inline static double compareL1(const uint8_t *a, const uint8_t *b, size_t size) {
       return compareL1<uint8_t, int>(a, b, size);
     }
 
+    inline static double compareL1(const int8_t *a, const int8_t *b, size_t size) {
+      return compareL1<int8_t, int>(a, b, size);
+    }
+
     inline static double compareL1(const float *a, const float *b, size_t size) {
       return compareL1<float, double>(a, b, size);
     }
@@ -421,8 +521,8 @@ namespace NGT {
       const unsigned char *lastgroup = last - 7;
       const __m128i zero = _mm_setzero_si128();
       while (a < lastgroup) {
-	__m128i x1 = _mm_cvtepu8_epi16(*reinterpret_cast<__m128i const*>(a));
-	__m128i x2 = _mm_cvtepu8_epi16(*reinterpret_cast<__m128i const*>(b));
+	__m128i x1 = _mm_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)a));
+	__m128i x2 = _mm_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)b));
 	x1 = _mm_subs_epi16(x1, x2);
 	x1 = _mm_sign_epi16(x1, x1);
 	sum = _mm_add_ps(sum, _mm_cvtepi32_ps(_mm_unpacklo_epi16(x1, zero)));
@@ -439,6 +539,12 @@ namespace NGT {
       }
       return s;
     }
+    inline static double compareL1(const int8_t *a, const int8_t *b, size_t size) {
+      return compareL1<int8_t, int>(a, b, size);
+    }
+    inline static double compareL1(const qsint8 *a, const qsint8 *b, size_t size) {
+      return compareL1(reinterpret_cast<const int8_t*>(a), reinterpret_cast<const int8_t*>(b), size);
+    }
 #endif
 
 #if defined(NGT_NO_AVX) || !defined(__POPCNT__)
@@ -592,6 +698,9 @@ namespace NGT {
     }
 #endif
 
+    inline static double compareSparseJaccardDistance(const qsint8 *a, const qsint8 *b, size_t size) {
+      NGTThrowException("Not supported.");
+    }
     inline static double compareSparseJaccardDistance(const float *a, const float *b, size_t size) {
       size_t loca = 0;
       size_t locb = 0;
@@ -721,13 +830,158 @@ namespace NGT {
     }
 #endif
 
-    inline static double compareDotProduct(const unsigned char *a, const unsigned char *b, size_t size) {
+    inline static double compareDotProduct(const uint8_t *a, const uint8_t *b, size_t size) {
       double sum = 0.0;
       for (size_t loc = 0; loc < size; loc++) {
 	sum += static_cast<double>(a[loc]) * static_cast<double>(b[loc]);
       }
       return sum;
     }
+    inline static double compareDotProduct(const int8_t *a, const int8_t *b, size_t size) {
+#if defined(NGT_NO_AVX)
+      double sum = 0.0;
+      for (size_t loc = 0; loc < size; loc++) {
+	sum += static_cast<double>(a[loc]) * static_cast<double>(b[loc]);
+      }
+      return sum;
+#else
+      const auto *last = a + size;
+#if defined(NGT_AVX512) || defined(NGT_AVX2)
+#if defined(NGT_AVX512)
+      __m512i sum512 = _mm512_setzero_si512();
+      {
+	const auto *lastgroup = last - 63;
+	while (a < lastgroup) {
+	  __m512i ma = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(a));
+	  __m512i mb = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(b));
+	  __m512i malo = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(ma, 0));
+	  __m512i mahi = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(ma, 1));
+	  __m512i mblo = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(mb, 0));
+	  __m512i mbhi = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(mb, 1));
+	  sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(malo, mblo));
+	  sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(mahi, mbhi));
+	  a += 64;
+	  b += 64;
+	}
+      }
+      __m256i sum256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(sum512, 0), _mm512_extracti64x4_epi64(sum512, 1));
+#else
+      __m256i sum256 = _mm256_setzero_si256();
+#endif
+      {
+	const auto *lastgroup = last - 31;
+	while (a < lastgroup) {
+	  __m256i ma = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(a));
+	  __m256i mb = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(b));
+	  __m256i malo = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(ma, 0));
+	  __m256i mahi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(ma, 1));
+	  __m256i mblo = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(mb, 0));
+	  __m256i mbhi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(mb, 1));
+	  sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(malo, mblo));
+	  sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(mahi, mbhi));
+	  a += 32;
+	  b += 32;
+	}
+      }
+      __m128i sum128 = _mm_add_epi32(_mm256_extracti128_si256(sum256, 0), _mm256_extracti128_si256(sum256, 1));
+#endif
+      //__m128i sum128 = _mm_setzero_si128();
+      {
+	const auto *lastgroup = last - 15;
+	while (a < lastgroup) {
+	  __m128i ma = _mm_loadu_si128(reinterpret_cast<const __m128i*>(a));
+	  __m128i mb = _mm_loadu_si128(reinterpret_cast<const __m128i*>(b));
+	  __m128i malo = _mm_cvtepi8_epi16(ma);
+	  __m128i mahi = _mm_cvtepi8_epi16(_mm_bsrli_si128(ma, 8));
+	  __m128i mblo = _mm_cvtepi8_epi16(mb);
+	  __m128i mbhi = _mm_cvtepi8_epi16(_mm_bsrli_si128(mb, 8));
+	  sum128 = _mm_add_epi32(sum128, _mm_madd_epi16(malo, mblo));
+	  sum128 = _mm_add_epi32(sum128, _mm_madd_epi16(mahi, mbhi));
+	  a += 16;
+	  b += 16;
+	}
+      }
+      __m128i tmp = _mm_hadd_epi32(sum128, _mm_set1_epi32(0));
+      double sum = _mm_extract_epi32(tmp, 0) + _mm_extract_epi32(tmp, 1);
+      return sum;
+#endif
+    }
+    inline static double compareDotProduct(const int8_t *a, const uint8_t *b, size_t size) {
+#if defined(__AVX512VNNI__)
+      const auto *last = a + size;
+#if defined(NGT_AVX512)
+      __m512i sum512 = _mm512_setzero_si512();
+      {
+	const auto *lastgroup = last - 191;
+	while (a < lastgroup) {
+	  __m512i ma = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(a));
+	  __m512i mb = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(b));
+	  sum512 = _mm512_dpbusd_epi32(sum512, mb, ma);
+	  a += 64;
+	  b += 64;
+	  ma = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(a));
+	  mb = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(b));
+	  sum512 = _mm512_dpbusd_epi32(sum512, mb, ma);
+	  a += 64;
+	  b += 64;
+	  ma = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(a));
+	  mb = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(b));
+	  sum512 = _mm512_dpbusd_epi32(sum512, mb, ma);
+	  a += 64;
+	  b += 64;
+	}
+      }
+      __m256i sum256 = _mm256_add_epi32(_mm512_extracti32x8_epi32(sum512, 0),
+					_mm512_extracti32x8_epi32(sum512, 1));
+      __m128i sum128 = _mm_add_epi32(_mm256_extracti32x4_epi32(sum256, 0),
+				     _mm256_extracti32x4_epi32(sum256, 1));
+#elif defined(NGT_AVX2)
+      __m256i sum256 = _mm256_setzero_si256();
+      {
+	const auto *lastgroup = last - 31;
+	while (a < lastgroup) {
+	  __m256i ma = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(a));
+	  __m256i mb = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(b));
+	  sum256 = _mm256_dpbusd_epi32(sum256, mb, ma);
+	  a += 32;
+	  b += 32;
+	}
+      }
+      __m128i sum128 = _mm_add_epi32(_mm256_extracti32x4_epi32(sum256, 0),
+				     _mm256_extracti32x4_epi32(sum256, 1));
+#else
+      __m128i sum128 = _mm_setzero_si128();
+#endif
+      {
+	const auto *lastgroup = last - 15;
+	while (a < lastgroup) {
+	  __m128i ma = _mm_loadu_si128(reinterpret_cast<const __m128i*>(a));
+	  __m128i mb = _mm_loadu_si128(reinterpret_cast<const __m128i*>(b));
+	  sum128 = _mm_dpbusd_epi32(sum128, mb, ma);
+	  a += 16;
+	  b += 16;
+	}
+      }
+      __m128i tmp = _mm_hadd_epi32(sum128, _mm_set1_epi32(0));
+      double sum = _mm_extract_epi32(tmp, 0) + _mm_extract_epi32(tmp, 1);
+#else
+      double sum = 0.0;
+      for (size_t loc = 0; loc < size; loc++) {
+	sum += static_cast<double>(a[loc]) * static_cast<double>(b[loc]);
+      }
+#endif
+      return sum;
+    }
+    inline static double compareDotProduct(const quint8 *a, const quint8 *b, size_t size) {
+      return compareDotProduct(reinterpret_cast<const uint8_t*>(a), reinterpret_cast<const uint8_t*>(b), size);
+    }
+    inline static double compareDotProduct(const qsint8 *a, const qsint8 *b, size_t size) {
+      auto d = compareDotProduct(reinterpret_cast<const int8_t*>(a), reinterpret_cast<const int8_t*>(b), size);
+      return d;
+    }
+    inline static double compareDotProduct(const qsint8 *a, const quint8 *b, size_t size) {
+      return compareDotProduct(reinterpret_cast<const int8_t*>(a), reinterpret_cast<const uint8_t*>(b), size);
+    }
     inline static double compareCosine(const float *a, const float *b, size_t size) {
 
       const float *last = a + size;
@@ -896,7 +1150,38 @@ namespace NGT {
       return cosine;
     }
 
+    inline static double compareCosine(const qsint8 *a, const qsint8 *b, size_t size) {
+      return compareCosine(reinterpret_cast<const uint8_t*>(a), reinterpret_cast<const uint8_t*>(b), size);
+    }
 
+    inline static double compareNormalizedCosineSimilarity(const float *a, const float *b, size_t size) {
+      auto v = 1.0 - compareDotProduct(a, b, size);
+      return v < 0.0 ? -v : v;
+    }
+    inline static double compareNormalizedCosineSimilarity(const float16 *a, const float16 *b, size_t size) {
+      auto v = 1.0 - compareDotProduct(a, b, size);
+      return v < 0.0 ? -v : v;
+    }
+#ifdef NGT_BFLOAT
+    inline static double compareNormalizedCosineSimilarity(const bfloat16 *a, const bfloat16 *b, size_t size) {
+      auto v = 1.0 - compareDotProduct(a, b, size);
+      return v < 0.0 ? -v : v;
+    }
+#endif
+    inline static double compareNormalizedCosineSimilarity(const uint8_t *a, const uint8_t *b, size_t size) {
+      auto v = 1.0 - compareDotProduct(a, b, size);
+      return v < 0.0 ? -v : v;
+    }
+    inline static double compareNormalizedCosineSimilarity(const qsint8 *a, const qsint8 *b, size_t size) {
+      float max = 127.0 * 127.0 / 0.5;
+      auto v = max - compareDotProduct(a, b, size);
+      return v;
+    }
+    inline static double compareNormalizedCosineSimilarity(const quint8 *a, const quint8 *b, size_t size) {
+      float max = 255.0 * 255.0 * size;
+      auto v = max - compareDotProduct(a, b, size);
+      return v;
+    }
 #endif    // #if defined(NGT_NO_AVX)
 
     template <typename OBJECT_TYPE>
@@ -954,12 +1239,6 @@ namespace NGT {
       return v < 0.0 ? -v : v;
     }
 
-    template <typename OBJECT_TYPE>
-    inline static double compareNormalizedCosineSimilarity(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) {
-      auto v = 1.0 - compareDotProduct(a, b, size);
-      return v < 0.0 ? -v : v;
-    }
-
     class L1Uint8 {
     public:
       inline static double compare(const void *a, const void *b, size_t size) {
@@ -1208,6 +1487,88 @@ namespace NGT {
 #endif
 
 
+
+    class SparseJaccardQsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	NGTThrowException("Not supported.");
+      }
+    };
+
+    class L2Qsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	return PrimitiveComparator::compareL2((const qsint8*)a, (const qsint8*)b, size);
+      }
+    };
+
+    class NormalizedL2Qsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	return PrimitiveComparator::compareNormalizedL2((const qsint8*)a, (const qsint8*)b, size);
+      }
+    };
+
+    class L1Qsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	NGTThrowException("Not supported.");
+      }
+    };
+
+    class CosineSimilarityQsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	NGTThrowException("Not supported.");
+      }
+    };
+
+    class AngleQsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	NGTThrowException("Not supported.");
+      }
+    };
+
+    class NormalizedAngleQsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	NGTThrowException("Not supported.");
+      }
+    };
+
+    // added by Nyapicom
+    class PoincareQsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	NGTThrowException("Not supported.");
+      }
+    };
+
+    // added by Nyapicom
+    class LorentzQsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	NGTThrowException("Not supported.");
+      }
+    };
+
+    class InnerProductQsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	auto d = PrimitiveComparator::compareDotProduct((const qsint8*)a, (const quint8*)b, size);
+	return 127.0 * 127.0 * size - d;
+      }
+    };
+
+    class NormalizedCosineSimilarityQsint8 {
+    public:
+      inline static double compare(const void *a, const void *b, size_t size) {
+	float max = 127.0 * 127.0 * size;
+	auto d = max - PrimitiveComparator::compareDotProduct((const qsint8*)a, (const qsint8*)b, size);
+	return d;
+      }
+    };
 };
 
 
diff --git a/lib/NGT/SharedMemoryAllocator.h b/lib/NGT/SharedMemoryAllocator.h
index ccc4b1d..e7441a3 100644
--- a/lib/NGT/SharedMemoryAllocator.h
+++ b/lib/NGT/SharedMemoryAllocator.h
@@ -62,7 +62,7 @@ class SharedMemoryAllocator {
     if(!isValid){
       return NULL;
     }
-    off_t file_offset = mmanager->alloc(size, true);
+    off_t file_offset = mmanager->alloc(size, true);	
     if (file_offset == -1) {
       std::cerr << "Fatal Error: Allocating memory size is too big for this settings." << std::endl;
       std::cerr << "             Max allocation size should be enlarged." << std::endl;
diff --git a/lib/NGT/defines.h.in b/lib/NGT/defines.h.in
index 5ee481f..4bec75b 100644
--- a/lib/NGT/defines.h.in
+++ b/lib/NGT/defines.h.in
@@ -28,6 +28,8 @@
 #cmakedefine NGTQG_NO_ROTATION
 #cmakedefine NGT_BFLOAT_DISABLED		// not use bfloat
 #cmakedefine NGT_BFLOAT_ENABLED
+#cmakedefine NGT_GRAPH_COMPACT_READ_ONLY_GRAPH
+#cmakedefine NGT_ENABLE_TIME_SEED_FOR_RANDOM
 // End of cmake defines
 
 //////////////////////////////////////////////////////////////////////////
@@ -63,7 +65,7 @@
  #define	NGT_QBG_DISABLED
 #endif
 
-#define NGT_INNER_PRODUCT
+#define NGT_REFINEMENT
 
 
 #if defined(NGT_AVX_DISABLED) 
@@ -72,6 +74,8 @@
 #undef NGT_AVX512
 #else
 #if defined(__AVX512F__) && defined(__AVX512DQ__)
+#if defined(__AVX512VNNI__)
+#endif
 #define NGT_AVX512
 #elif defined(__AVX2__)
 #define NGT_AVX2
diff --git a/lib/NGT/half.hpp b/lib/NGT/half.hpp
index 4f4030f..bc5798d 100644
--- a/lib/NGT/half.hpp
+++ b/lib/NGT/half.hpp
@@ -16,8 +16,8 @@
 
 // Version 2.2.0
 
-/// \file
-/// Main header file for half-precision functionality.
+
+
 
 #ifndef HALF_HALF_HPP
 #define HALF_HALF_HPP
@@ -269,12 +269,12 @@
 
 
 #ifndef HALF_ENABLE_F16C_INTRINSICS
-	/// Enable F16C intruction set intrinsics.
-	/// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting between
-	/// half-precision and single-precision values which may result in improved performance. This will not perform additional checks
-	/// for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature.
-	///
-	/// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms.
+	
+	
+	
+	
+	
+	
 	#define HALF_ENABLE_F16C_INTRINSICS __F16C__
 #endif
 #if HALF_ENABLE_F16C_INTRINSICS
@@ -282,117 +282,117 @@
 #endif
 
 #ifdef HALF_DOXYGEN_ONLY
-/// Type for internal floating-point computations.
-/// This can be predefined to a built-in floating-point type (`float`, `double` or `long double`) to override the internal
-/// half-precision implementation to use this type for computing arithmetic operations and mathematical function (if available).
-/// This can result in improved performance for arithmetic operators and mathematical functions but might cause results to
-/// deviate from the specified half-precision rounding mode and inhibits proper detection of half-precision exceptions.
+
+
+
+
+
 #define HALF_ARITHMETIC_TYPE (undefined)
 
-/// Enable internal exception flags.
-/// Defining this to 1 causes operations on half-precision values to raise internal floating-point exception flags according to
-/// the IEEE 754 standard. These can then be cleared and checked with clearexcept(), testexcept().
+
+
+
 #define HALF_ERRHANDLING_FLAGS	0
 
-/// Enable exception propagation to `errno`.
-/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to
-/// [errno](https://en.cppreference.com/w/cpp/error/errno) from `<cerrno>`. Specifically this will propagate domain errors as
-/// [EDOM](https://en.cppreference.com/w/cpp/error/errno_macros) and pole, overflow and underflow errors as
-/// [ERANGE](https://en.cppreference.com/w/cpp/error/errno_macros). Inexact errors won't be propagated.
+
+
+
+
+
 #define HALF_ERRHANDLING_ERRNO	0
 
-/// Enable exception propagation to built-in floating-point platform.
-/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to the built-in
-/// single- and double-precision implementation's exception flags using the
-/// [C++11 floating-point environment control](https://en.cppreference.com/w/cpp/numeric/fenv) from `<cfenv>`. However, this
-/// does not work in reverse and single- or double-precision exceptions will not raise the corresponding half-precision
-/// exception flags, nor will explicitly clearing flags clear the corresponding built-in flags.
+
+
+
+
+
+
 #define HALF_ERRHANDLING_FENV	0
 
-/// Throw C++ exception on domain errors.
-/// Defining this to a string literal causes operations on half-precision values to throw a
-/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on domain errors.
+
+
+
 #define HALF_ERRHANDLING_THROW_INVALID		(undefined)
 
-/// Throw C++ exception on pole errors.
-/// Defining this to a string literal causes operations on half-precision values to throw a
-/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on pole errors.
+
+
+
 #define HALF_ERRHANDLING_THROW_DIVBYZERO	(undefined)
 
-/// Throw C++ exception on overflow errors.
-/// Defining this to a string literal causes operations on half-precision values to throw a
-/// [std::overflow_error](https://en.cppreference.com/w/cpp/error/overflow_error) with the specified message on overflows.
+
+
+
 #define HALF_ERRHANDLING_THROW_OVERFLOW		(undefined)
 
-/// Throw C++ exception on underflow errors.
-/// Defining this to a string literal causes operations on half-precision values to throw a
-/// [std::underflow_error](https://en.cppreference.com/w/cpp/error/underflow_error) with the specified message on underflows.
+
+
+
 #define HALF_ERRHANDLING_THROW_UNDERFLOW	(undefined)
 
-/// Throw C++ exception on rounding errors.
-/// Defining this to 1 causes operations on half-precision values to throw a
-/// [std::range_error](https://en.cppreference.com/w/cpp/error/range_error) with the specified message on general rounding errors.
+
+
+
 #define HALF_ERRHANDLING_THROW_INEXACT		(undefined)
 #endif
 
 #ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
-/// Raise INEXACT exception on overflow.
-/// Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition.
-/// These will be raised after any possible handling of the underflow exception.
+
+
+
 #define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT	1
 #endif
 
 #ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
-/// Raise INEXACT exception on underflow.
-/// Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition.
-/// These will be raised after any possible handling of the underflow exception.
-///
-/// **Note:** This will actually cause underflow (and the accompanying inexact) exceptions to be raised *only* when the result
-/// is inexact, while if disabled bare underflow errors will be raised for *any* (possibly exact) subnormal result.
+
+
+
+
+
+
 #define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT	1
 #endif
 
-/// Default rounding mode.
-/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and more precise types
-/// (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical
-/// functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective
-/// constants or the equivalent values of
-/// [std::float_round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/float_round_style):
-///
-/// `std::float_round_style`         | value | rounding
-/// ---------------------------------|-------|-------------------------
-/// `std::round_indeterminate`       | -1    | fastest
-/// `std::round_toward_zero`         | 0     | toward zero
-/// `std::round_to_nearest`          | 1     | to nearest (default)
-/// `std::round_toward_infinity`     | 2     | toward positive infinity
-/// `std::round_toward_neg_infinity` | 3     | toward negative infinity
-///
-/// By default this is set to `1` (`std::round_to_nearest`), which rounds results to the nearest representable value. It can even
-/// be set to [std::numeric_limits<float>::round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/round_style) to synchronize
-/// the rounding mode with that of the built-in single-precision implementation (which is likely `std::round_to_nearest`, though).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 #ifndef HALF_ROUND_STYLE
 	#define HALF_ROUND_STYLE	1		// = std::round_to_nearest
 #endif
 
-/// Value signaling overflow.
-/// In correspondence with `HUGE_VAL[F|L]` from `<cmath>` this symbol expands to a positive value signaling the overflow of an
-/// operation, in particular it just evaluates to positive infinity.
-///
-/// **See also:** Documentation for [HUGE_VAL](https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL)
+
+
+
+
+
 #define HUGE_VALH	std::numeric_limits<half_float::half>::infinity()
 
-/// Fast half-precision fma function.
-/// This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate
-/// half-precision multiplication followed by an addition, which is always the case.
-///
-/// **See also:** Documentation for [FP_FAST_FMA](https://en.cppreference.com/w/cpp/numeric/math/fma)
+
+
+
+
+
 #define FP_FAST_FMAH	1
 
-///	Half rounding mode.
-/// In correspondence with `FLT_ROUNDS` from `<cfloat>` this symbol expands to the rounding mode used for
-/// half-precision operations. It is an alias for [HALF_ROUND_STYLE](\ref HALF_ROUND_STYLE).
-///
-/// **See also:** Documentation for [FLT_ROUNDS](https://en.cppreference.com/w/cpp/types/climits/FLT_ROUNDS)
+
+
+
+
+
 #define HLF_ROUNDS	HALF_ROUND_STYLE
 
 #ifndef FP_ILOGB0
@@ -427,51 +427,51 @@
 #endif
 
 
-/// Main namespace for half-precision functionality.
-/// This namespace contains all the functionality provided by the library.
+
+
 namespace half_float
 {
 	class half;
 
 #if HALF_ENABLE_CPP11_USER_LITERALS
-	/// Library-defined half-precision literals.
-	/// Import this namespace to enable half-precision floating-point literals:
-	/// ~~~~{.cpp}
-	/// using namespace half_float::literal;
-	/// half_float::half = 4.2_h;
-	/// ~~~~
+	
+	
+	
+	
+	
+	
 	namespace literal
 	{
 		half operator "" _h(long double);
 	}
 #endif
 
-	/// \internal
-	/// \brief Implementation details.
+	
+	
 	namespace detail
 	{
 	#if HALF_ENABLE_CPP11_TYPE_TRAITS
-		/// Conditional type.
+		
 		template<bool B,typename T,typename F> struct conditional : std::conditional<B,T,F> {};
 
-		/// Helper for tag dispatching.
+		
 		template<bool B> struct bool_type : std::integral_constant<bool,B> {};
 		using std::true_type;
 		using std::false_type;
 
-		/// Type traits for floating-point types.
+		
 		template<typename T> struct is_float : std::is_floating_point<T> {};
 	#else
-		/// Conditional type.
+		
 		template<bool,typename T,typename> struct conditional { typedef T type; };
 		template<typename T,typename F> struct conditional<false,T,F> { typedef F type; };
 
-		/// Helper for tag dispatching.
+		
 		template<bool> struct bool_type {};
 		typedef bool_type<true> true_type;
 		typedef bool_type<false> false_type;
 
-		/// Type traits for floating-point types.
+		
 		template<typename> struct is_float : false_type {};
 		template<typename T> struct is_float<const T> : is_float<T> {};
 		template<typename T> struct is_float<volatile T> : is_float<T> {};
@@ -481,68 +481,68 @@ namespace half_float
 		template<> struct is_float<long double> : true_type {};
 	#endif
 
-		/// Type traits for floating-point bits.
+		
 		template<typename T> struct bits { typedef unsigned char type; };
 		template<typename T> struct bits<const T> : bits<T> {};
 		template<typename T> struct bits<volatile T> : bits<T> {};
 		template<typename T> struct bits<const volatile T> : bits<T> {};
 
 	#if HALF_ENABLE_CPP11_CSTDINT
-		/// Unsigned integer of (at least) 16 bits width.
+		
 		typedef std::uint_least16_t uint16;
 
-		/// Fastest unsigned integer of (at least) 32 bits width.
+		
 		typedef std::uint_fast32_t uint32;
 
-		/// Fastest signed integer of (at least) 32 bits width.
+		
 		typedef std::int_fast32_t int32;
 
-		/// Unsigned integer of (at least) 32 bits width.
+		
 		template<> struct bits<float> { typedef std::uint_least32_t type; };
 
-		/// Unsigned integer of (at least) 64 bits width.
+		
 		template<> struct bits<double> { typedef std::uint_least64_t type; };
 	#else
-		/// Unsigned integer of (at least) 16 bits width.
+		
 		typedef unsigned short uint16;
 
-		/// Fastest unsigned integer of (at least) 32 bits width.
+		
 		typedef unsigned long uint32;
 
-		/// Fastest unsigned integer of (at least) 32 bits width.
+		
 		typedef long int32;
 
-		/// Unsigned integer of (at least) 32 bits width.
+		
 		template<> struct bits<float> : conditional<std::numeric_limits<unsigned int>::digits>=32,unsigned int,unsigned long> {};
 
 		#if HALF_ENABLE_CPP11_LONG_LONG
-			/// Unsigned integer of (at least) 64 bits width.
+			
 			template<> struct bits<double> : conditional<std::numeric_limits<unsigned long>::digits>=64,unsigned long,unsigned long long> {};
 		#else
-			/// Unsigned integer of (at least) 64 bits width.
+			
 			template<> struct bits<double> { typedef unsigned long type; };
 		#endif
 	#endif
 
 	#ifdef HALF_ARITHMETIC_TYPE
-		/// Type to use for arithmetic computations and mathematic functions internally.
+		
 		typedef HALF_ARITHMETIC_TYPE internal_t;
 	#endif
 
-		/// Tag type for binary construction.
+		
 		struct binary_t {};
 
-		/// Tag for binary construction.
+		
 		HALF_CONSTEXPR_CONST binary_t binary = binary_t();
 
-		/// \name Implementation defined classification and arithmetic
-		/// \{
+		
+		
 
-		/// Check for infinity.
-		/// \tparam T argument type (builtin floating-point type)
-		/// \param arg value to query
-		/// \retval true if infinity
-		/// \retval false else
+		
+		
+		
+		
+		
 		template<typename T> bool builtin_isinf(T arg)
 		{
 		#if HALF_ENABLE_CPP11_CMATH
@@ -554,11 +554,11 @@ namespace half_float
 		#endif
 		}
 
-		/// Check for NaN.
-		/// \tparam T argument type (builtin floating-point type)
-		/// \param arg value to query
-		/// \retval true if not a number
-		/// \retval false else
+		
+		
+		
+		
+		
 		template<typename T> bool builtin_isnan(T arg)
 		{
 		#if HALF_ENABLE_CPP11_CMATH
@@ -570,11 +570,11 @@ namespace half_float
 		#endif
 		}
 
-		/// Check sign.
-		/// \tparam T argument type (builtin floating-point type)
-		/// \param arg value to query
-		/// \retval true if signbit set
-		/// \retval false else
+		
+		
+		
+		
+		
 		template<typename T> bool builtin_signbit(T arg)
 		{
 		#if HALF_ENABLE_CPP11_CMATH
@@ -584,10 +584,10 @@ namespace half_float
 		#endif
 		}
 
-		/// Platform-independent sign mask.
-		/// \param arg integer value in two's complement
-		/// \retval -1 if \a arg negative
-		/// \retval 0 if \a arg positive
+		
+		
+		
+		
 		inline uint32 sign_mask(uint32 arg)
 		{
 			static const int N = std::numeric_limits<uint32>::digits - 1;
@@ -598,10 +598,10 @@ namespace half_float
 		#endif
 		}
 
-		/// Platform-independent arithmetic right shift.
-		/// \param arg integer value in two's complement
-		/// \param i shift amount (at most 31)
-		/// \return \a arg right shifted for \a i bits with possible sign extension
+		
+		
+		
+		
 		inline uint32 arithmetic_shift(uint32 arg, int i)
 		{
 		#if HALF_TWOS_COMPLEMENT_INT
@@ -611,17 +611,17 @@ namespace half_float
 		#endif
 		}
 
-		/// \}
-		/// \name Error handling
-		/// \{
+		
+		
+		
 
-		/// Internal exception flags.
-		/// \return reference to global exception flags
+		
+		
 		inline int& errflags() { HALF_THREAD_LOCAL int flags = 0; return flags; }
 
-		/// Raise floating-point exception.
-		/// \param flags exceptions to raise
-		/// \param cond condition to raise exceptions for
+		
+		
+		
 		inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true)
 		{
 		#if HALF_ERRHANDLING
@@ -670,12 +670,12 @@ namespace half_float
 		#endif
 		}
 
-		/// Check and signal for any NaN.
-		/// \param x first half-precision value to check
-		/// \param y second half-precision value to check
-		/// \retval true if either \a x or \a y is NaN
-		/// \retval false else
-		/// \exception FE_INVALID if \a x or \a y is NaN
+		
+		
+		
+		
+		
+		
 		inline HALF_CONSTEXPR_NOERR bool compsignal(unsigned int x, unsigned int y)
 		{
 		#if HALF_ERRHANDLING
@@ -684,10 +684,10 @@ namespace half_float
 			return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00;
 		}
 
-		/// Signal and silence signaling NaN.
-		/// \param nan half-precision NaN value
-		/// \return quiet NaN
-		/// \exception FE_INVALID if \a nan is signaling NaN
+		
+		
+		
+		
 		inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int nan)
 		{
 		#if HALF_ERRHANDLING
@@ -696,11 +696,11 @@ namespace half_float
 			return nan | 0x200;
 		}
 
-		/// Signal and silence signaling NaNs.
-		/// \param x first half-precision value to check
-		/// \param y second half-precision value to check
-		/// \return quiet NaN
-		/// \exception FE_INVALID if \a x or \a y is signaling NaN
+		
+		
+		
+		
+		
 		inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y)
 		{
 		#if HALF_ERRHANDLING
@@ -709,12 +709,12 @@ namespace half_float
 			return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200);
 		}
 
-		/// Signal and silence signaling NaNs.
-		/// \param x first half-precision value to check
-		/// \param y second half-precision value to check
-		/// \param z third half-precision value to check
-		/// \return quiet NaN
-		/// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN
+		
+		
+		
+		
+		
+		
 		inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z)
 		{
 		#if HALF_ERRHANDLING
@@ -723,11 +723,11 @@ namespace half_float
 			return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200);
 		}
 
-		/// Select value or signaling NaN.
-		/// \param x preferred half-precision value
-		/// \param y ignored half-precision value except for signaling NaN
-		/// \return \a y if signaling NaN, \a x otherwise
-		/// \exception FE_INVALID if \a y is signaling NaN
+		
+		
+		
+		
+		
 		inline HALF_CONSTEXPR_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y))
 		{
 		#if HALF_ERRHANDLING
@@ -737,9 +737,9 @@ namespace half_float
 		#endif
 		}
 
-		/// Raise domain error and return NaN.
-		/// return quiet NaN
-		/// \exception FE_INVALID
+		
+		
+		
 		inline HALF_CONSTEXPR_NOERR unsigned int invalid()
 		{
 		#if HALF_ERRHANDLING
@@ -748,10 +748,10 @@ namespace half_float
 			return 0x7FFF;
 		}
 
-		/// Raise pole error and return infinity.
-		/// \param sign half-precision value with sign bit only
-		/// \return half-precision infinity with sign of \a sign
-		/// \exception FE_DIVBYZERO
+		
+		
+		
+		
 		inline HALF_CONSTEXPR_NOERR unsigned int pole(unsigned int sign = 0)
 		{
 		#if HALF_ERRHANDLING
@@ -760,10 +760,10 @@ namespace half_float
 			return sign | 0x7C00;
 		}
 
-		/// Check value for underflow.
-		/// \param arg non-zero half-precision value to check
-		/// \return \a arg
-		/// \exception FE_UNDERFLOW if arg is subnormal
+		
+		
+		
+		
 		inline HALF_CONSTEXPR_NOERR unsigned int check_underflow(unsigned int arg)
 		{
 		#if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
@@ -772,15 +772,15 @@ namespace half_float
 			return arg;
 		}
 
-		/// \}
-		/// \name Conversion and rounding
-		/// \{
+		
+		
+		
 
-		/// Half-precision overflow.
-		/// \tparam R rounding mode to use
-		/// \param sign half-precision value with sign bit only
-		/// \return rounded overflowing half-precision value
-		/// \exception FE_OVERFLOW
+		
+		
+		
+		
+		
 		template<std::float_round_style R> HALF_CONSTEXPR_NOERR unsigned int overflow(unsigned int sign = 0)
 		{
 		#if HALF_ERRHANDLING
@@ -792,11 +792,11 @@ namespace half_float
 					(sign|0x7C00);
 		}
 
-		/// Half-precision underflow.
-		/// \tparam R rounding mode to use
-		/// \param sign half-precision value with sign bit only
-		/// \return rounded underflowing half-precision value
-		/// \exception FE_UNDERFLOW
+		
+		
+		
+		
+		
 		template<std::float_round_style R> HALF_CONSTEXPR_NOERR unsigned int underflow(unsigned int sign = 0)
 		{
 		#if HALF_ERRHANDLING
@@ -807,16 +807,16 @@ namespace half_float
 					sign;
 		}
 
-		/// Round half-precision number.
-		/// \tparam R rounding mode to use
-		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
-		/// \param value finite half-precision number to round
-		/// \param g guard bit (most significant discarded bit)
-		/// \param s sticky bit (or of all but the most significant discarded bits)
-		/// \return rounded half-precision value
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,bool I> HALF_CONSTEXPR_NOERR unsigned int rounded(unsigned int value, int g, int s)
 		{
 		#if HALF_ERRHANDLING
@@ -838,14 +838,14 @@ namespace half_float
 		#endif
 		}
 
-		/// Round half-precision number to nearest integer value.
-		/// \tparam R rounding mode to use
-		/// \tparam E `true` for round to even, `false` for round away from zero
-		/// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it
-		/// \param value half-precision value to round
-		/// \return half-precision bits for nearest integral value
-		/// \exception FE_INVALID for signaling NaN
-		/// \exception FE_INEXACT if value had to be rounded and \a I is `true`
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,bool E,bool I> unsigned int integral(unsigned int value)
 		{
 			unsigned int abs = value & 0x7FFF;
@@ -867,20 +867,20 @@ namespace half_float
 						0) + value) & ~mask;
 		}
 
-		/// Convert fixed point to half-precision floating-point.
-		/// \tparam R rounding mode to use
-		/// \tparam F number of fractional bits in [11,31]
-		/// \tparam S `true` for signed, `false` for unsigned
-		/// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F
-		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
-		/// \param m mantissa in Q1.F fixed point format
-		/// \param exp biased exponent - 1
-		/// \param sign half-precision value with sign bit only
-		/// \param s sticky bit (or of all but the most significant already discarded bits)
-		/// \return value converted to half-precision
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,unsigned int F,bool S,bool N,bool I> unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0)
 		{
 			if(S)
@@ -896,14 +896,14 @@ namespace half_float
 			return rounded<R,I>(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast<uint32>(1)<<(F-11))-1))!=0));
 		}
 
-		/// Convert IEEE single-precision to half-precision.
-		/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
-		/// \tparam R rounding mode to use
-		/// \param value single-precision value to convert
-		/// \return rounded half-precision value
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if value had to be rounded
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R> unsigned int float2half_impl(float value, true_type)
 		{
 		#if HALF_ENABLE_F16C_INTRINSICS
@@ -985,13 +985,13 @@ namespace half_float
 		#endif
 		}
 
-		/// Convert IEEE double-precision to half-precision.
-		/// \tparam R rounding mode to use
-		/// \param value double-precision value to convert
-		/// \return rounded half-precision value
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if value had to be rounded
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R> unsigned int float2half_impl(double value, true_type)
 		{
 		#if HALF_ENABLE_F16C_INTRINSICS
@@ -1020,14 +1020,14 @@ namespace half_float
 			return sign;
 		}
 
-		/// Convert non-IEEE floating-point to half-precision.
-		/// \tparam R rounding mode to use
-		/// \tparam T source type (builtin floating-point type)
-		/// \param value floating-point value to convert
-		/// \return rounded half-precision value
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if value had to be rounded
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,typename T> unsigned int float2half_impl(T value, ...)
 		{
 			unsigned int hbits = static_cast<unsigned>(builtin_signbit(value)) << 15;
@@ -1053,26 +1053,26 @@ namespace half_float
 			return rounded<R,false>(hbits+(m>>1), m&1, frac!=T());
 		}
 
-		/// Convert floating-point to half-precision.
-		/// \tparam R rounding mode to use
-		/// \tparam T source type (builtin floating-point type)
-		/// \param value floating-point value to convert
-		/// \return rounded half-precision value
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if value had to be rounded
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,typename T> unsigned int float2half(T value)
 		{
 			return float2half_impl<R>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(typename bits<T>::type)==sizeof(T)>());
 		}
 
-		/// Convert integer to half-precision floating-point.
-		/// \tparam R rounding mode to use
-		/// \tparam T type to convert (builtin integer type)
-		/// \param value integral value to convert
-		/// \return rounded half-precision value
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_INEXACT if value had to be rounded
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,typename T> unsigned int int2half(T value)
 		{
 			unsigned int bits = static_cast<unsigned>(value<0) << 15;
@@ -1089,10 +1089,10 @@ namespace half_float
 			return (exp>24) ? rounded<R,false>(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits;
 		}
 
-		/// Convert half-precision to IEEE single-precision.
-		/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
-		/// \param value half-precision value to convert
-		/// \return single-precision value
+		
+		
+		
+		
 		inline float half2float_impl(unsigned int value, float, true_type)
 		{
 		#if HALF_ENABLE_F16C_INTRINSICS
@@ -1253,9 +1253,9 @@ namespace half_float
 		#endif
 		}
 
-		/// Convert half-precision to IEEE double-precision.
-		/// \param value half-precision value to convert
-		/// \return double-precision value
+		
+		
+		
 		inline double half2float_impl(unsigned int value, double, true_type)
 		{
 		#if HALF_ENABLE_F16C_INTRINSICS
@@ -1276,10 +1276,10 @@ namespace half_float
 		#endif
 		}
 
-		/// Convert half-precision to non-IEEE floating-point.
-		/// \tparam T type to convert to (builtin integer type)
-		/// \param value half-precision value to convert
-		/// \return floating-point value
+		
+		
+		
+		
 		template<typename T> T half2float_impl(unsigned int value, T, ...)
 		{
 			T out;
@@ -1296,24 +1296,24 @@ namespace half_float
 			return (value&0x8000) ? -out : out;
 		}
 
-		/// Convert half-precision to floating-point.
-		/// \tparam T type to convert to (builtin integer type)
-		/// \param value half-precision value to convert
-		/// \return floating-point value
+		
+		
+		
+		
 		template<typename T> T half2float(unsigned int value)
 		{
 			return half2float_impl(value, T(), bool_type<std::numeric_limits<T>::is_iec559&&sizeof(typename bits<T>::type)==sizeof(T)>());
 		}
 
-		/// Convert half-precision floating-point to integer.
-		/// \tparam R rounding mode to use
-		/// \tparam E `true` for round to even, `false` for round away from zero
-		/// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it
-		/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
-		/// \param value half-precision value to convert
-		/// \return rounded integer value
-		/// \exception FE_INVALID if value is not representable in type \a T
-		/// \exception FE_INEXACT if value had to be rounded and \a I is `true`
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,bool E,bool I,typename T> T half2int(unsigned int value)
 		{
 			unsigned int abs = value & 0x7FFF;
@@ -1343,15 +1343,15 @@ namespace half_float
 			return static_cast<T>((value&0x8000) ? -i : i);
 		}
 
-		/// \}
-		/// \name Mathematics
-		/// \{
+		
+		
+		
 
-		/// upper part of 64-bit multiplication.
-		/// \tparam R rounding mode to use
-		/// \param x first factor
-		/// \param y second factor
-		/// \return upper 32 bit of \a x * \a y
+		
+		
+		
+		
+		
 		template<std::float_round_style R> uint32 mulhi(uint32 x, uint32 y)
 		{
 			uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16);
@@ -1359,10 +1359,10 @@ namespace half_float
 				((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0);
 		}
 
-		/// 64-bit multiplication.
-		/// \param x first factor
-		/// \param y second factor
-		/// \return upper 32 bit of \a x * \a y rounded to nearest
+		
+		
+		
+		
 		inline uint32 multiply64(uint32 x, uint32 y)
 		{
 		#if HALF_ENABLE_CPP11_LONG_LONG
@@ -1372,11 +1372,11 @@ namespace half_float
 		#endif
 		}
 
-		/// 64-bit division.
-		/// \param x upper 32 bit of dividend
-		/// \param y divisor
-		/// \param s variable to store sticky bit for rounding
-		/// \return (\a x << 32) / \a y
+		
+		
+		
+		
+		
 		inline uint32 divide64(uint32 x, uint32 y, int &s)
 		{
 		#if HALF_ENABLE_CPP11_LONG_LONG
@@ -1399,13 +1399,13 @@ namespace half_float
 		#endif
 		}
 
-		/// Half precision positive modulus.
-		/// \tparam Q `true` to compute full quotient, `false` else
-		/// \tparam R `true` to compute signed remainder, `false` for positive remainder
-		/// \param x first operand as positive finite half-precision value
-		/// \param y second operand as positive finite half-precision value
-		/// \param quo adress to store quotient at, `nullptr` if \a Q `false`
-		/// \return modulus of \a x / \a y
+		
+		
+		
+		
+		
+		
+		
 		template<bool Q,bool R> unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL)
 		{
 			unsigned int q = 0;
@@ -1472,11 +1472,11 @@ namespace half_float
 			return x;
 		}
 
-		/// Fixed point square root.
-		/// \tparam F number of fractional bits
-		/// \param r radicand in Q1.F fixed point format
-		/// \param exp exponent
-		/// \return square root as Q1.F/2
+		
+		
+		
+		
+		
 		template<unsigned int F> uint32 sqrt(uint32 &r, int &exp)
 		{
 			int i = exp & 1;
@@ -1496,11 +1496,11 @@ namespace half_float
 			return m;
 		}
 
-		/// Fixed point binary exponential.
-		/// This uses the BKM algorithm in E-mode.
-		/// \param m exponent in [0,1) as Q0.31
-		/// \param n number of iterations (at most 32)
-		/// \return 2 ^ \a m as Q1.31
+		
+		
+		
+		
+		
 		inline uint32 exp2(uint32 m, unsigned int n = 32)
 		{
 			static const uint32 logs[] = {
@@ -1523,11 +1523,11 @@ namespace half_float
 			return mx;
 		}
 
-		/// Fixed point binary logarithm.
-		/// This uses the BKM algorithm in L-mode.
-		/// \param m mantissa in [1,2) as Q1.30
-		/// \param n number of iterations (at most 32)
-		/// \return log2(\a m) as Q0.31
+		
+		
+		
+		
+		
 		inline uint32 log2(uint32 m, unsigned int n = 32)
 		{
 			static const uint32 logs[] = {
@@ -1550,11 +1550,11 @@ namespace half_float
 			return my;
 		}
 
-		/// Fixed point sine and cosine.
-		/// This uses the CORDIC algorithm in rotation mode.
-		/// \param mz angle in [-pi/2,pi/2] as Q1.30
-		/// \param n number of iterations (at most 31)
-		/// \return sine and cosine of \a mz as Q1.30
+		
+		
+		
+		
+		
 		inline std::pair<uint32,uint32> sincos(uint32 mz, unsigned int n = 31)
 		{
 			static const uint32 angles[] = {
@@ -1573,12 +1573,12 @@ namespace half_float
 			return std::make_pair(my, mx);
 		}
 
-		/// Fixed point arc tangent.
-		/// This uses the CORDIC algorithm in vectoring mode.
-		/// \param my y coordinate as Q0.30
-		/// \param mx x coordinate as Q0.30
-		/// \param n number of iterations (at most 31)
-		/// \return arc tangent of \a my / \a mx as Q1.30
+		
+		
+		
+		
+		
+		
 		inline uint32 atan2(uint32 my, uint32 mx, unsigned int n = 31)
 		{
 			static const uint32 angles[] = {
@@ -1597,10 +1597,10 @@ namespace half_float
 			return mz;
 		}
 
-		/// Reduce argument for trigonometric functions.
-		/// \param abs half-precision floating-point value
-		/// \param k value to take quarter period
-		/// \return \a abs reduced to [-pi/4,pi/4] as Q0.30
+		
+		
+		
+		
 		inline uint32 angle_arg(unsigned int abs, int &k)
 		{
 			uint32 m = (abs&0x3FF) | ((abs>0x3FF)<<10);
@@ -1621,9 +1621,9 @@ namespace half_float
 		#endif
 		}
 
-		/// Get arguments for atan2 function.
-		/// \param abs half-precision floating-point value
-		/// \return \a abs and sqrt(1 - \a abs^2) as Q0.30
+		
+		
+		
 		inline std::pair<uint32,uint32> atan2_args(unsigned int abs)
 		{
 			int exp = -15;
@@ -1642,11 +1642,11 @@ namespace half_float
 			return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx);
 		}
 
-		/// Get exponentials for hyperbolic computation
-		/// \param abs half-precision floating-point value
-		/// \param exp variable to take unbiased exponent of larger result
-		/// \param n number of BKM iterations (at most 32)
-		/// \return exp(abs) and exp(-\a abs) as Q1.31 with same exponent
+		
+		
+		
+		
+		
 		inline std::pair<uint32,uint32> hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32)
 		{
 			uint32 mx = detail::multiply64(static_cast<uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my;
@@ -1674,17 +1674,17 @@ namespace half_float
 			return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast<uint32>(1)<<d)-1))!=0)) : 1);
 		}
 
-		/// Postprocessing for binary exponential.
-		/// \tparam R rounding mode to use
-		/// \param m fractional part of as Q0.31
-		/// \param exp absolute value of unbiased exponent
-		/// \param esign sign of actual exponent
-		/// \param sign sign bit of result
-		/// \param n number of BKM iterations (at most 32)
-		/// \return value converted to half-precision
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R> unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32)
 		{
 			if(esign)
@@ -1706,17 +1706,17 @@ namespace half_float
 			return fixed2half<R,31,false,false,true>(m, exp+14, sign, s);
 		}
 
-		/// Postprocessing for binary logarithm.
-		/// \tparam R rounding mode to use
-		/// \tparam L logarithm for base transformation as Q1.31
-		/// \param m fractional part of logarithm as Q0.31
-		/// \param ilog signed integer part of logarithm
-		/// \param exp biased exponent of result
-		/// \param sign sign bit of result
-		/// \return value base-transformed and converted to half-precision
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if no other exception occurred
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,uint32 L> unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0)
 		{
 			uint32 msign = sign_mask(ilog);
@@ -1734,14 +1734,14 @@ namespace half_float
 			return fixed2half<R,30,false,false,true>(m, exp, sign, 1);
 		}
 
-		/// Hypotenuse square root and postprocessing.
-		/// \tparam R rounding mode to use
-		/// \param r mantissa as Q2.30
-		/// \param exp biased exponent
-		/// \return square root converted to half-precision
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if value had to be rounded
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R> unsigned int hypot_post(uint32 r, int exp)
 		{
 			int i = r >> 31;
@@ -1754,16 +1754,16 @@ namespace half_float
 			return fixed2half<R,15,false,false,false>(m, exp-1, 0, r!=0);
 		}
 
-		/// Division and postprocessing for tangents.
-		/// \tparam R rounding mode to use
-		/// \param my dividend as Q1.31
-		/// \param mx divisor as Q1.31
-		/// \param exp biased exponent of result
-		/// \param sign sign bit of result
-		/// \return quotient converted to half-precision
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if no other exception occurred
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R> unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0)
 		{
 			int i = my >= mx, s;
@@ -1776,15 +1776,15 @@ namespace half_float
 			return fixed2half<R,30,false,false,true>(m, exp, sign, s);
 		}
 
-		/// Area function and postprocessing.
-		/// This computes the value directly in Q2.30 using the representation `asinh|acosh(x) = log(x+sqrt(x^2+|-1))`.
-		/// \tparam R rounding mode to use
-		/// \tparam S `true` for asinh, `false` for acosh
-		/// \param arg half-precision argument
-		/// \return asinh|acosh(\a arg) converted to half-precision
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if no other exception occurred
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,bool S> unsigned int area(unsigned int arg)
 		{
 			int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i;
@@ -1834,16 +1834,16 @@ namespace half_float
 			return log2_post<R,0xB8AA3B2A>(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast<unsigned>(S)<<15));
 		}
 
-		/// Class for 1.31 unsigned floating-point computation
+		
 		struct f31
 		{
-			/// Constructor.
-			/// \param mant mantissa as 1.31
-			/// \param e exponent
+			
+			
+			
 			HALF_CONSTEXPR f31(uint32 mant, int e) : m(mant), exp(e) {}
 
-			/// Constructor.
-			/// \param abs unsigned half-precision value
+			
+			
 			f31(unsigned int abs) : exp(-15)
 			{
 				for(; abs<0x400; abs<<=1,--exp) ;
@@ -1851,10 +1851,10 @@ namespace half_float
 				exp += (abs>>10);
 			}
 
-			/// Addition operator.
-			/// \param a first operand
-			/// \param b second operand
-			/// \return \a a + \a b
+			
+			
+			
+			
 			friend f31 operator+(f31 a, f31 b)
 			{
 				if(b.exp > a.exp)
@@ -1865,10 +1865,10 @@ namespace half_float
 				return f31(((m+i)>>i)|0x80000000, a.exp+i);
 			}
 
-			/// Subtraction operator.
-			/// \param a first operand
-			/// \param b second operand
-			/// \return \a a - \a b
+			
+			
+			
+			
 			friend f31 operator-(f31 a, f31 b)
 			{
 				int d = a.exp - b.exp, exp = a.exp;
@@ -1879,10 +1879,10 @@ namespace half_float
 				return f31(m, exp);
 			}
 
-			/// Multiplication operator.
-			/// \param a first operand
-			/// \param b second operand
-			/// \return \a a * \a b
+			
+			
+			
+			
 			friend f31 operator*(f31 a, f31 b)
 			{
 				uint32 m = multiply64(a.m, b.m);
@@ -1890,10 +1890,10 @@ namespace half_float
 				return f31(m<<(1-i), a.exp + b.exp + i);
 			}
 
-			/// Division operator.
-			/// \param a first operand
-			/// \param b second operand
-			/// \return \a a / \a b
+			
+			
+			
+			
 			friend f31 operator/(f31 a, f31 b)
 			{
 				int i = a.m >= b.m, s;
@@ -1901,20 +1901,20 @@ namespace half_float
 				return f31(m, a.exp - b.exp + i - 1);
 			}
 
-			uint32 m;			///< mantissa as 1.31.
-			int exp;			///< exponent.
+			uint32 m;			
+			int exp;			
 		};
 
-		/// Error function and postprocessing.
-		/// This computes the value directly in Q1.31 using the approximations given
-		/// [here](https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions).
-		/// \tparam R rounding mode to use
-		/// \tparam C `true` for comlementary error function, `false` else
-		/// \param arg half-precision function argument
-		/// \return approximated value of error function in half-precision
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if no other exception occurred
+		
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,bool C> unsigned int erf(unsigned int arg)
 		{
 			unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
@@ -1925,15 +1925,15 @@ namespace half_float
 					(e.exp<-25) ? underflow<R>() : fixed2half<R,30,false,false,true>(e.m>>1, e.exp+14, 0, e.m&1);
 		}
 
-		/// Gamma function and postprocessing.
-		/// This approximates the value of either the gamma function or its logarithm directly in Q1.31.
-		/// \tparam R rounding mode to use
-		/// \tparam L `true` for lograithm of gamma function, `false` for gamma function
-		/// \param arg half-precision floating-point value
-		/// \return lgamma/tgamma(\a arg) in half-precision
-		/// \exception FE_OVERFLOW on overflows
-		/// \exception FE_UNDERFLOW on underflows
-		/// \exception FE_INEXACT if \a arg is not a positive integer
+		
+		
+		
+		
+		
+		
+		
+		
+		
 		template<std::float_round_style R,bool L> unsigned int gamma(unsigned int arg)
 		{
 /*			static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
@@ -2030,144 +2030,144 @@ namespace half_float
 			}
 			return fixed2half<R,31,false,false,true>(s.m, s.exp+14, sign);
 		}
-		/// \}
+		
 
 		template<typename,typename,std::float_round_style> struct half_caster;
 	}
 
-	/// Half-precision floating-point type.
-	/// This class implements an IEEE-conformant half-precision floating-point type with the usual arithmetic
-	/// operators and conversions. It is implicitly convertible to single-precision floating-point, which makes artihmetic
-	/// expressions and functions with mixed-type operands to be of the most precise operand type.
-	///
-	/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and
-	/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which
-	/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the
-	/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of
-	/// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most
-	/// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit
-	/// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if
-	/// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on
-	/// nearly any reasonable platform.
-	///
-	/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable
-	/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation.
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	class half
 	{
 	public:
-		/// \name Construction and assignment
-		/// \{
+		
+		
 
-		/// Default constructor.
-		/// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics
-		/// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics.
+		
+		
+		
 		HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {}
 
-		/// Conversion constructor.
-		/// \param rhs float to convert
-		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		
+		
+		
 		explicit half(float rhs) : data_(static_cast<detail::uint16>(detail::float2half<round_style>(rhs))) {}
 	
-		/// Conversion to single-precision.
-		/// \return single precision value representing expression value
+		
+		
 		operator float() const { return detail::half2float<float>(data_); }
 
-		/// Assignment operator.
-		/// \param rhs single-precision value to copy from
-		/// \return reference to this half
-		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		
+		
+		
+		
 		half& operator=(float rhs) { data_ = static_cast<detail::uint16>(detail::float2half<round_style>(rhs)); return *this; }
 
-		/// \}
-		/// \name Arithmetic updates
-		/// \{
+		
+		
+		
 
-		/// Arithmetic assignment.
-		/// \tparam T type of concrete half expression
-		/// \param rhs half expression to add
-		/// \return reference to this half
-		/// \exception FE_... according to operator+(half,half)
+		
+		
+		
+		
+		
 		half& operator+=(half rhs) { return *this = *this + rhs; }
 
-		/// Arithmetic assignment.
-		/// \tparam T type of concrete half expression
-		/// \param rhs half expression to subtract
-		/// \return reference to this half
-		/// \exception FE_... according to operator-(half,half)
+		
+		
+		
+		
+		
 		half& operator-=(half rhs) { return *this = *this - rhs; }
 
-		/// Arithmetic assignment.
-		/// \tparam T type of concrete half expression
-		/// \param rhs half expression to multiply with
-		/// \return reference to this half
-		/// \exception FE_... according to operator*(half,half)
+		
+		
+		
+		
+		
 		half& operator*=(half rhs) { return *this = *this * rhs; }
 
-		/// Arithmetic assignment.
-		/// \tparam T type of concrete half expression
-		/// \param rhs half expression to divide by
-		/// \return reference to this half
-		/// \exception FE_... according to operator/(half,half)
+		
+		
+		
+		
+		
 		half& operator/=(half rhs) { return *this = *this / rhs; }
 
-		/// Arithmetic assignment.
-		/// \param rhs single-precision value to add
-		/// \return reference to this half
-		/// \exception FE_... according to operator=()
+		
+		
+		
+		
 		half& operator+=(float rhs) { return *this = *this + rhs; }
 
-		/// Arithmetic assignment.
-		/// \param rhs single-precision value to subtract
-		/// \return reference to this half
-		/// \exception FE_... according to operator=()
+		
+		
+		
+		
 		half& operator-=(float rhs) { return *this = *this - rhs; }
 
-		/// Arithmetic assignment.
-		/// \param rhs single-precision value to multiply with
-		/// \return reference to this half
-		/// \exception FE_... according to operator=()
+		
+		
+		
+		
 		half& operator*=(float rhs) { return *this = *this * rhs; }
 
-		/// Arithmetic assignment.
-		/// \param rhs single-precision value to divide by
-		/// \return reference to this half
-		/// \exception FE_... according to operator=()
+		
+		
+		
+		
 		half& operator/=(float rhs) { return *this = *this / rhs; }
 
-		/// \}
-		/// \name Increment and decrement
-		/// \{
+		
+		
+		
 
-		/// Prefix increment.
-		/// \return incremented half value
-		/// \exception FE_... according to operator+(half,half)
+		
+		
+		
 		half& operator++() { return *this = *this + half(detail::binary, 0x3C00); }
 
-		/// Prefix decrement.
-		/// \return decremented half value
-		/// \exception FE_... according to operator-(half,half)
+		
+		
+		
 		half& operator--() { return *this = *this + half(detail::binary, 0xBC00); }
 
-		/// Postfix increment.
-		/// \return non-incremented half value
-		/// \exception FE_... according to operator+(half,half)
+		
+		
+		
 		half operator++(int) { half out(*this); ++*this; return out; }
 
-		/// Postfix decrement.
-		/// \return non-decremented half value
-		/// \exception FE_... according to operator-(half,half)
+		
+		
+		
 		half operator--(int) { half out(*this); --*this; return out; }
-		/// \}
+		
 	
 	private:
-		/// Rounding mode to use
+		
 		static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE);
 
-		/// Constructor.
-		/// \param bits binary representation to set half to
+		
+		
 		HALF_CONSTEXPR half(detail::binary_t, unsigned int bits) HALF_NOEXCEPT : data_(static_cast<detail::uint16>(bits)) {}
 
-		/// Internal binary representation
+		
 		detail::uint16 data_;
 
 	#ifndef HALF_DOXYGEN_ONLY
@@ -2269,25 +2269,25 @@ namespace half_float
 #if HALF_ENABLE_CPP11_USER_LITERALS
 	namespace literal
 	{
-		/// Half literal.
-		/// While this returns a properly rounded half-precision value, half literals can unfortunately not be constant
-		/// expressions due to rather involved conversions. So don't expect this to be a literal literal without involving
-		/// conversion operations at runtime. It is a convenience feature, not a performance optimization.
-		/// \param value literal value
-		/// \return half with of given value (possibly rounded)
-		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		
+		
+		
+		
+		
+		
+		
 		inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half<half::round_style>(value)); }
 	}
 #endif
 
 	namespace detail
 	{
-		/// Helper class for half casts.
-		/// This class template has to be specialized for all valid cast arguments to define an appropriate static
-		/// `cast` member function and a corresponding `type` member denoting its return type.
-		/// \tparam T destination type
-		/// \tparam U source type
-		/// \tparam R rounding mode to use
+		
+		
+		
+		
+		
+		
 		template<typename T,typename U,std::float_round_style R=(std::float_round_style)(HALF_ROUND_STYLE)> struct half_caster {};
 		template<typename U,std::float_round_style R> struct half_caster<half,U,R>
 		{
@@ -2320,132 +2320,132 @@ namespace half_float
 	}
 }
 
-/// Extensions to the C++ standard library.
+
 namespace std
 {
-	/// Numeric limits for half-precision floats.
-	/// **See also:** Documentation for [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits)
+	
+	
 	template<> class numeric_limits<half_float::half>
 	{
 	public:
-		/// Is template specialization.
+		
 		static HALF_CONSTEXPR_CONST bool is_specialized = true;
 
-		/// Supports signed values.
+		
 		static HALF_CONSTEXPR_CONST bool is_signed = true;
 
-		/// Is not an integer type.
+		
 		static HALF_CONSTEXPR_CONST bool is_integer = false;
 
-		/// Is not exact.
+		
 		static HALF_CONSTEXPR_CONST bool is_exact = false;
 
-		/// Doesn't provide modulo arithmetic.
+		
 		static HALF_CONSTEXPR_CONST bool is_modulo = false;
 
-		/// Has a finite set of values.
+		
 		static HALF_CONSTEXPR_CONST bool is_bounded = true;
 
-		/// IEEE conformant.
+		
 		static HALF_CONSTEXPR_CONST bool is_iec559 = true;
 
-		/// Supports infinity.
+		
 		static HALF_CONSTEXPR_CONST bool has_infinity = true;
 
-		/// Supports quiet NaNs.
+		
 		static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true;
 
-		/// Supports signaling NaNs.
+		
 		static HALF_CONSTEXPR_CONST bool has_signaling_NaN = true;
 
-		/// Supports subnormal values.
+		
 		static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present;
 
-		/// Supports no denormalization detection.
+		
 		static HALF_CONSTEXPR_CONST bool has_denorm_loss = false;
 
 	#if HALF_ERRHANDLING_THROWS
 		static HALF_CONSTEXPR_CONST bool traps = true;
 	#else
-		/// Traps only if [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID) is acitvated.
+		
 		static HALF_CONSTEXPR_CONST bool traps = false;
 	#endif
 
-		/// Does not support no pre-rounding underflow detection.
+		
 		static HALF_CONSTEXPR_CONST bool tinyness_before = false;
 
-		/// Rounding mode.
+		
 		static HALF_CONSTEXPR_CONST float_round_style round_style = half_float::half::round_style;
 
-		/// Significant digits.
+		
 		static HALF_CONSTEXPR_CONST int digits = 11;
 
-		/// Significant decimal digits.
+		
 		static HALF_CONSTEXPR_CONST int digits10 = 3;
 
-		/// Required decimal digits to represent all possible values.
+		
 		static HALF_CONSTEXPR_CONST int max_digits10 = 5;
 
-		/// Number base.
+		
 		static HALF_CONSTEXPR_CONST int radix = 2;
 
-		/// One more than smallest exponent.
+		
 		static HALF_CONSTEXPR_CONST int min_exponent = -13;
 
-		/// Smallest normalized representable power of 10.
+		
 		static HALF_CONSTEXPR_CONST int min_exponent10 = -4;
 
-		/// One more than largest exponent
+		
 		static HALF_CONSTEXPR_CONST int max_exponent = 16;
 
-		/// Largest finitely representable power of 10.
+		
 		static HALF_CONSTEXPR_CONST int max_exponent10 = 4;
 
-		/// Smallest positive normal value.
+		
 		static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); }
 
-		/// Smallest finite value.
+		
 		static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); }
 
-		/// Largest finite value.
+		
 		static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); }
 
-		/// Difference between 1 and next representable value.
+		
 		static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); }
 
-		/// Maximum rounding error in ULP (units in the last place).
+		
 		static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW
 			{ return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); }
 
-		/// Positive infinity.
+		
 		static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); }
 
-		/// Quiet NaN.
+		
 		static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); }
 
-		/// Signaling NaN.
+		
 		static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); }
 
-		/// Smallest positive subnormal value.
+		
 		static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); }
 	};
 
 #if HALF_ENABLE_CPP11_HASH
-	/// Hash function for half-precision floats.
-	/// This is only defined if C++11 `std::hash` is supported and enabled.
-	///
-	/// **See also:** Documentation for [std::hash](https://en.cppreference.com/w/cpp/utility/hash)
+	
+	
+	
+	
 	template<> struct hash<half_float::half>
 	{
-		/// Type of function argument.
+		
 		typedef half_float::half argument_type;
 
-		/// Function return type.
+		
 		typedef size_t result_type;
 
-		/// Compute hash function.
-		/// \param arg half to hash
-		/// \return hash value
+		
+		
+		
 		result_type operator()(argument_type arg) const { return hash<half_float::detail::uint16>()(arg.data_&-static_cast<unsigned>(arg.data_!=0x8000)); }
 	};
 #endif
@@ -2453,102 +2453,102 @@ namespace std
 
 namespace half_float
 {
-	/// \anchor compop
-	/// \name Comparison operators
-	/// \{
-
-	/// Comparison for equality.
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if operands equal
-	/// \retval false else
-	/// \exception FE_INVALID if \a x or \a y is NaN
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR_NOERR bool operator==(half x, half y)
 	{
 		return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF));
 	}
 
-	/// Comparison for inequality.
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if operands not equal
-	/// \retval false else
-	/// \exception FE_INVALID if \a x or \a y is NaN
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR_NOERR bool operator!=(half x, half y)
 	{
 		return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF));
 	}
 
-	/// Comparison for less than.
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if \a x less than \a y
-	/// \retval false else
-	/// \exception FE_INVALID if \a x or \a y is NaN
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR_NOERR bool operator<(half x, half y)
 	{
 		return !detail::compsignal(x.data_, y.data_) &&
 			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
 	}
 
-	/// Comparison for greater than.
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if \a x greater than \a y
-	/// \retval false else
-	/// \exception FE_INVALID if \a x or \a y is NaN
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR_NOERR bool operator>(half x, half y)
 	{
 		return !detail::compsignal(x.data_, y.data_) &&
 			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
 	}
 
-	/// Comparison for less equal.
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if \a x less equal \a y
-	/// \retval false else
-	/// \exception FE_INVALID if \a x or \a y is NaN
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR_NOERR bool operator<=(half x, half y)
 	{
 		return !detail::compsignal(x.data_, y.data_) &&
 			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
 	}
 
-	/// Comparison for greater equal.
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if \a x greater equal \a y
-	/// \retval false else
-	/// \exception FE_INVALID if \a x or \a y is NaN
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR_NOERR bool operator>=(half x, half y)
 	{
 		return !detail::compsignal(x.data_, y.data_) &&
 			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
 	}
 
-	/// \}
-	/// \anchor arithmetics
-	/// \name Arithmetic operators
-	/// \{
+	
+	
+	
+	
 
-	/// Identity.
-	/// \param arg operand
-	/// \return unchanged operand
+	
+	
+	
 	inline HALF_CONSTEXPR half operator+(half arg) { return arg; }
 
-	/// Negation.
-	/// \param arg operand
-	/// \return negated operand
+	
+	
+	
 	inline HALF_CONSTEXPR half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); }
 
-	/// Addition.
-	/// This operation is exact to rounding for all rounding modes.
-	/// \param x left operand
-	/// \param y right operand
-	/// \return sum of half expressions
-	/// \exception FE_INVALID if \a x and \a y are infinities with different signs or signaling NaNs
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
 	inline half operator+(half x, half y)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -2592,13 +2592,13 @@ namespace half_float
 	#endif
 	}
 
-	/// Subtraction.
-	/// This operation is exact to rounding for all rounding modes.
-	/// \param x left operand
-	/// \param y right operand
-	/// \return difference of half expressions
-	/// \exception FE_INVALID if \a x and \a y are infinities with equal signs or signaling NaNs
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
 	inline half operator-(half x, half y)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -2608,13 +2608,13 @@ namespace half_float
 	#endif
 	}
 
-	/// Multiplication.
-	/// This operation is exact to rounding for all rounding modes.
-	/// \param x left operand
-	/// \param y right operand
-	/// \return product of half expressions
-	/// \exception FE_INVALID if multiplying 0 with infinity or if \a x or \a y is signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
 	inline half operator*(half x, half y)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -2640,14 +2640,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Division.
-	/// This operation is exact to rounding for all rounding modes.
-	/// \param x left operand
-	/// \param y right operand
-	/// \return quotient of half expressions
-	/// \exception FE_INVALID if dividing 0s or infinities with each other or if \a x or \a y is signaling NaN
-	/// \exception FE_DIVBYZERO if dividing finite value by 0
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half operator/(half x, half y)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -2677,16 +2677,16 @@ namespace half_float
 	#endif
 	}
 
-	/// \}
-	/// \anchor streaming
-	/// \name Input and output
-	/// \{
+	
+	
+	
+	
 
-	/// Output operator.
-	///	This uses the built-in functionality for streaming out floating-point numbers.
-	/// \param out output stream to write into
-	/// \param arg half expression to write
-	/// \return reference to output stream
+	
+	
+	
+	
+	
 	template<typename charT,typename traits> std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits> &out, half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -2696,15 +2696,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Input operator.
-	///	This uses the built-in functionality for streaming in floating-point numbers, specifically double precision floating
-	/// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref HALF_ARITHMETIC_TYPE)). So the input string is first
-	/// rounded to double precision using the underlying platform's current floating-point rounding mode before being rounded
-	/// to half-precision using the library's half-precision rounding mode.
-	/// \param in input stream to read from
-	/// \param arg half to read into
-	/// \return reference to input stream
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	template<typename charT,typename traits> std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits> &in, half &arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -2717,29 +2717,29 @@ namespace half_float
 		return in;
 	}
 
-	/// \}
-	/// \anchor basic
-	/// \name Basic mathematical operations
-	/// \{
+	
+	
+	
+	
 
-	/// Absolute value.
-	/// **See also:** Documentation for [std::fabs](https://en.cppreference.com/w/cpp/numeric/math/fabs).
-	/// \param arg operand
-	/// \return absolute value of \a arg
+	
+	
+	
+	
 	inline HALF_CONSTEXPR half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); }
 
-	/// Absolute value.
-	/// **See also:** Documentation for [std::abs](https://en.cppreference.com/w/cpp/numeric/math/fabs).
-	/// \param arg operand
-	/// \return absolute value of \a arg
+	
+	
+	
+	
 	inline HALF_CONSTEXPR half abs(half arg) { return fabs(arg); }
 
-	/// Remainder of division.
-	/// **See also:** Documentation for [std::fmod](https://en.cppreference.com/w/cpp/numeric/math/fmod).
-	/// \param x first operand
-	/// \param y second operand
-	/// \return remainder of floating-point division.
-	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	
+	
+	
+	
+	
+	
 	inline half fmod(half x, half y)
 	{
 		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
@@ -2755,12 +2755,12 @@ namespace half_float
 		return half(detail::binary, sign|detail::mod<false,false>(absx, absy));
 	}
 
-	/// Remainder of division.
-	/// **See also:** Documentation for [std::remainder](https://en.cppreference.com/w/cpp/numeric/math/remainder).
-	/// \param x first operand
-	/// \param y second operand
-	/// \return remainder of floating-point division.
-	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	
+	
+	
+	
+	
+	
 	inline half remainder(half x, half y)
 	{
 		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
@@ -2774,13 +2774,13 @@ namespace half_float
 		return half(detail::binary, sign^detail::mod<false,true>(absx, absy));
 	}
 
-	/// Remainder of division.
-	/// **See also:** Documentation for [std::remquo](https://en.cppreference.com/w/cpp/numeric/math/remquo).
-	/// \param x first operand
-	/// \param y second operand
-	/// \param quo address to store some bits of quotient at
-	/// \return remainder of floating-point division.
-	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	
+	
+	
+	
+	
+	
+	
 	inline half remquo(half x, half y, int *quo)
 	{
 		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000;
@@ -2796,16 +2796,16 @@ namespace half_float
 		return *quo = qsign ? -q : q, half(detail::binary, value);
 	}
 
-	/// Fused multiply add.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::fma](https://en.cppreference.com/w/cpp/numeric/math/fma).
-	/// \param x first operand
-	/// \param y second operand
-	/// \param z third operand
-	/// \return ( \a x * \a y ) + \a z rounded as one operation.
-	/// \exception FE_INVALID according to operator*() and operator+() unless any argument is a quiet NaN and no argument is a signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding the final addition
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half fma(half x, half y, half z)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -2869,38 +2869,38 @@ namespace half_float
 	#endif
 	}
 
-	/// Maximum of half expressions.
-	/// **See also:** Documentation for [std::fmax](https://en.cppreference.com/w/cpp/numeric/math/fmax).
-	/// \param x first operand
-	/// \param y second operand
-	/// \return maximum of operands, ignoring quiet NaNs
-	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR_NOERR half fmax(half x, half y)
 	{
 		return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <
 			(y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
 	}
 
-	/// Minimum of half expressions.
-	/// **See also:** Documentation for [std::fmin](https://en.cppreference.com/w/cpp/numeric/math/fmin).
-	/// \param x first operand
-	/// \param y second operand
-	/// \return minimum of operands, ignoring quiet NaNs
-	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR_NOERR half fmin(half x, half y)
 	{
 		return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) >
 			(y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
 	}
 
-	/// Positive difference.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::fdim](https://en.cppreference.com/w/cpp/numeric/math/fdim).
-	/// \param x first operand
-	/// \param y second operand
-	/// \return \a x - \a y or 0 if difference negative
-	/// \exception FE_... according to operator-(half,half)
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half fdim(half x, half y)
 	{
 		if(isnan(x) || isnan(y))
@@ -2908,10 +2908,10 @@ namespace half_float
 		return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y);
 	}
 
-	/// Get NaN value.
-	/// **See also:** Documentation for [std::nan](https://en.cppreference.com/w/cpp/numeric/math/nan).
-	/// \param arg string code
-	/// \return quiet NaN
+	
+	
+	
+	
 	inline half nanh(const char *arg)
 	{
 		unsigned int value = 0x7FFF;
@@ -2920,19 +2920,19 @@ namespace half_float
 		return half(detail::binary, value);
 	}
 
-	/// \}
-	/// \anchor exponential
-	/// \name Exponential functions
-	/// \{
-
-	/// Exponential function.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::exp](https://en.cppreference.com/w/cpp/numeric/math/exp).
-	/// \param arg function argument
-	/// \return e raised to \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half exp(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -2960,14 +2960,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Binary exponential.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::exp2](https://en.cppreference.com/w/cpp/numeric/math/exp2).
-	/// \param arg function argument
-	/// \return 2 raised to \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half exp2(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -2985,15 +2985,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Exponential minus one.
-	/// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest`
-	/// and in <1% of inputs for any other rounding mode.
-	///
-	/// **See also:** Documentation for [std::expm1](https://en.cppreference.com/w/cpp/numeric/math/expm1).
-	/// \param arg function argument
-	/// \return e raised to \a arg and subtracted by 1
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half expm1(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3038,15 +3038,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Natural logarithm.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::log](https://en.cppreference.com/w/cpp/numeric/math/log).
-	/// \param arg function argument
-	/// \return logarithm of \a arg to base e
-	/// \exception FE_INVALID for signaling NaN or negative argument
-	/// \exception FE_DIVBYZERO for 0
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half log(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3066,15 +3066,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Common logarithm.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::log10](https://en.cppreference.com/w/cpp/numeric/math/log10).
-	/// \param arg function argument
-	/// \return logarithm of \a arg to base 10
-	/// \exception FE_INVALID for signaling NaN or negative argument
-	/// \exception FE_DIVBYZERO for 0
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half log10(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3101,15 +3101,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Binary logarithm.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::log2](https://en.cppreference.com/w/cpp/numeric/math/log2).
-	/// \param arg function argument
-	/// \return logarithm of \a arg to base 2
-	/// \exception FE_INVALID for signaling NaN or negative argument
-	/// \exception FE_DIVBYZERO for 0
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half log2(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3143,16 +3143,16 @@ namespace half_float
 	#endif
 	}
 
-	/// Natural logarithm plus one.
-	/// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest`
-	/// and in ~1% of inputs for any other rounding mode.
-	///
-	/// **See also:** Documentation for [std::log1p](https://en.cppreference.com/w/cpp/numeric/math/log1p).
-	/// \param arg function argument
-	/// \return logarithm of \a arg plus 1 to base e
-	/// \exception FE_INVALID for signaling NaN or argument <-1
-	/// \exception FE_DIVBYZERO for -1
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half log1p(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3190,19 +3190,19 @@ namespace half_float
 	#endif
 	}
 
-	/// \}
-	/// \anchor power
-	/// \name Power functions
-	/// \{
-
-	/// Square root.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::sqrt](https://en.cppreference.com/w/cpp/numeric/math/sqrt).
-	/// \param arg function argument
-	/// \return square root of \a arg
-	/// \exception FE_INVALID for signaling NaN and negative arguments
-	/// \exception FE_INEXACT according to rounding
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half sqrt(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3217,13 +3217,13 @@ namespace half_float
 	#endif
 	}
 
-	/// Inverse square root.
-	/// This function is exact to rounding for all rounding modes and thus generally more accurate than directly computing
-	/// 1 / sqrt(\a arg) in half-precision, in addition to also being faster.
-	/// \param arg function argument
-	/// \return reciprocal of square root of \a arg
-	/// \exception FE_INVALID for signaling NaN and negative arguments
-	/// \exception FE_INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
 	inline half rsqrt(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3253,14 +3253,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Cubic root.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::cbrt](https://en.cppreference.com/w/cpp/numeric/math/cbrt).
-	/// \param arg function argument
-	/// \return cubic root of \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half cbrt(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3303,15 +3303,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Hypotenuse function.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot).
-	/// \param x first argument
-	/// \param y second argument
-	/// \return square root of sum of squares without internal over- or underflows
-	/// \exception FE_INVALID if \a x or \a y is signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half hypot(half x, half y)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3348,16 +3348,16 @@ namespace half_float
 	#endif
 	}
 
-	/// Hypotenuse function.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot).
-	/// \param x first argument
-	/// \param y second argument
-	/// \param z third argument
-	/// \return square root of sum of squares without internal over- or underflows
-	/// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half hypot(half x, half y, half z)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3414,16 +3414,16 @@ namespace half_float
 	#endif
 	}
 
-	/// Power function.
-	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.00025% of inputs.
-	///
-	/// **See also:** Documentation for [std::pow](https://en.cppreference.com/w/cpp/numeric/math/pow).
-	/// \param x base
-	/// \param y exponent
-	/// \return \a x raised to \a y
-	/// \exception FE_INVALID if \a x or \a y is signaling NaN or if \a x is finite an negative and \a y is finite and not integral
-	/// \exception FE_DIVBYZERO if \a x is 0 and \a y is negative
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half pow(half x, half y)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3474,20 +3474,20 @@ namespace half_float
 	#endif
 	}
 
-	/// \}
-	/// \anchor trigonometric
-	/// \name Trigonometric functions
-	/// \{
-
-	/// Compute sine and cosine simultaneously.
-	///	This returns the same results as sin() and cos() but is faster than calling each function individually.
-	///
-	/// This function is exact to rounding for all rounding modes.
-	/// \param arg function argument
-	/// \param sin variable to take sine of \a arg
-	/// \param cos variable to take cosine of \a arg
-	/// \exception FE_INVALID for signaling NaN or infinity
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline void sincos(half arg, half *sin, half *cos)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3545,14 +3545,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Sine function.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::sin](https://en.cppreference.com/w/cpp/numeric/math/sin).
-	/// \param arg function argument
-	/// \return sine value of \a arg
-	/// \exception FE_INVALID for signaling NaN or infinity
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half sin(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3578,14 +3578,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Cosine function.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::cos](https://en.cppreference.com/w/cpp/numeric/math/cos).
-	/// \param arg function argument
-	/// \return cosine value of \a arg
-	/// \exception FE_INVALID for signaling NaN or infinity
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half cos(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3606,14 +3606,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Tangent function.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::tan](https://en.cppreference.com/w/cpp/numeric/math/tan).
-	/// \param arg function argument
-	/// \return tangent value of \a arg
-	/// \exception FE_INVALID for signaling NaN or infinity
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half tan(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3643,14 +3643,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Arc sine.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::asin](https://en.cppreference.com/w/cpp/numeric/math/asin).
-	/// \param arg function argument
-	/// \return arc sine value of \a arg
-	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half asin(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3672,14 +3672,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Arc cosine function.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::acos](https://en.cppreference.com/w/cpp/numeric/math/acos).
-	/// \param arg function argument
-	/// \return arc cosine value of \a arg
-	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half acos(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3697,14 +3697,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Arc tangent function.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::atan](https://en.cppreference.com/w/cpp/numeric/math/atan).
-	/// \param arg function argument
-	/// \return arc tangent value of \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half atan(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3725,16 +3725,16 @@ namespace half_float
 	#endif
 	}
 
-	/// Arc tangent function.
-	/// This function may be 1 ULP off the correctly rounded exact result in ~0.005% of inputs for `std::round_to_nearest`,
-	/// in ~0.1% of inputs for `std::round_toward_zero` and in ~0.02% of inputs for any other rounding mode.
-	///
-	/// **See also:** Documentation for [std::atan2](https://en.cppreference.com/w/cpp/numeric/math/atan2).
-	/// \param y numerator
-	/// \param x denominator
-	/// \return arc tangent value
-	/// \exception FE_INVALID if \a x or \a y is signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half atan2(half y, half x)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3777,19 +3777,19 @@ namespace half_float
 	#endif
 	}
 
-	/// \}
-	/// \anchor hyperbolic
-	/// \name Hyperbolic functions
-	/// \{
-
-	/// Hyperbolic sine.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::sinh](https://en.cppreference.com/w/cpp/numeric/math/sinh).
-	/// \param arg function argument
-	/// \return hyperbolic sine value of \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half sinh(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3810,14 +3810,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Hyperbolic cosine.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::cosh](https://en.cppreference.com/w/cpp/numeric/math/cosh).
-	/// \param arg function argument
-	/// \return hyperbolic cosine value of \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half cosh(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3837,14 +3837,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Hyperbolic tangent.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::tanh](https://en.cppreference.com/w/cpp/numeric/math/tanh).
-	/// \param arg function argument
-	/// \return hyperbolic tangent value of \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half tanh(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3869,14 +3869,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Hyperbolic area sine.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::asinh](https://en.cppreference.com/w/cpp/numeric/math/asinh).
-	/// \param arg function argument
-	/// \return area sine value of \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half asinh(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3897,14 +3897,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Hyperbolic area cosine.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::acosh](https://en.cppreference.com/w/cpp/numeric/math/acosh).
-	/// \param arg function argument
-	/// \return area cosine value of \a arg
-	/// \exception FE_INVALID for signaling NaN or arguments <1
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half acosh(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3921,15 +3921,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Hyperbolic area tangent.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::atanh](https://en.cppreference.com/w/cpp/numeric/math/atanh).
-	/// \param arg function argument
-	/// \return area tangent value of \a arg
-	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
-	/// \exception FE_DIVBYZERO for +/-1
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half atanh(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3950,19 +3950,19 @@ namespace half_float
 	#endif
 	}
 
-	/// \}
-	/// \anchor special
-	/// \name Error and gamma functions
-	/// \{
-
-	/// Error function.
-	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs.
-	///
-	/// **See also:** Documentation for [std::erf](https://en.cppreference.com/w/cpp/numeric/math/erf).
-	/// \param arg function argument
-	/// \return error function value of \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half erf(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3977,14 +3977,14 @@ namespace half_float
 	#endif
 	}
 
-	/// Complementary error function.
-	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs.
-	///
-	/// **See also:** Documentation for [std::erfc](https://en.cppreference.com/w/cpp/numeric/math/erfc).
-	/// \param arg function argument
-	/// \return 1 minus error function value of \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half erfc(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -4001,15 +4001,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Natural logarithm of gamma function.
-	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.025% of inputs.
-	///
-	/// **See also:** Documentation for [std::lgamma](https://en.cppreference.com/w/cpp/numeric/math/lgamma).
-	/// \param arg function argument
-	/// \return natural logarith of gamma function for \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_DIVBYZERO for 0 or negative integer arguments
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half lgamma(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -4026,15 +4026,15 @@ namespace half_float
 	#endif
 	}
 
-	/// Gamma function.
-	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.25% of inputs.
-	///
-	/// **See also:** Documentation for [std::tgamma](https://en.cppreference.com/w/cpp/numeric/math/tgamma).
-	/// \param arg function argument
-	/// \return gamma function value of \a arg
-	/// \exception FE_INVALID for signaling NaN, negative infinity or negative integer arguments
-	/// \exception FE_DIVBYZERO for 0
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half tgamma(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -4057,100 +4057,100 @@ namespace half_float
 	#endif
 	}
 
-	/// \}
-	/// \anchor rounding
-	/// \name Rounding
-	/// \{
-
-	/// Nearest integer not less than half value.
-	/// **See also:** Documentation for [std::ceil](https://en.cppreference.com/w/cpp/numeric/math/ceil).
-	/// \param arg half to round
-	/// \return nearest integer not less than \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_INEXACT if value had to be rounded
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
 	inline half ceil(half arg) { return half(detail::binary, detail::integral<std::round_toward_infinity,true,true>(arg.data_)); }
 
-	/// Nearest integer not greater than half value.
-	/// **See also:** Documentation for [std::floor](https://en.cppreference.com/w/cpp/numeric/math/floor).
-	/// \param arg half to round
-	/// \return nearest integer not greater than \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_INEXACT if value had to be rounded
+	
+	
+	
+	
+	
+	
 	inline half floor(half arg) { return half(detail::binary, detail::integral<std::round_toward_neg_infinity,true,true>(arg.data_)); }
 
-	/// Nearest integer not greater in magnitude than half value.
-	/// **See also:** Documentation for [std::trunc](https://en.cppreference.com/w/cpp/numeric/math/trunc).
-	/// \param arg half to round
-	/// \return nearest integer not greater in magnitude than \a arg
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_INEXACT if value had to be rounded
+	
+	
+	
+	
+	
+	
 	inline half trunc(half arg) { return half(detail::binary, detail::integral<std::round_toward_zero,true,true>(arg.data_)); }
 
-	/// Nearest integer.
-	/// **See also:** Documentation for [std::round](https://en.cppreference.com/w/cpp/numeric/math/round).
-	/// \param arg half to round
-	/// \return nearest integer, rounded away from zero in half-way cases
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_INEXACT if value had to be rounded
+	
+	
+	
+	
+	
+	
 	inline half round(half arg) { return half(detail::binary, detail::integral<std::round_to_nearest,false,true>(arg.data_)); }
 
-	/// Nearest integer.
-	/// **See also:** Documentation for [std::lround](https://en.cppreference.com/w/cpp/numeric/math/round).
-	/// \param arg half to round
-	/// \return nearest integer, rounded away from zero in half-way cases
-	/// \exception FE_INVALID if value is not representable as `long`
+	
+	
+	
+	
+	
 	inline long lround(half arg) { return detail::half2int<std::round_to_nearest,false,false,long>(arg.data_); }
 
-	/// Nearest integer using half's internal rounding mode.
-	/// **See also:** Documentation for [std::rint](https://en.cppreference.com/w/cpp/numeric/math/rint).
-	/// \param arg half expression to round
-	/// \return nearest integer using default rounding mode
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_INEXACT if value had to be rounded
+	
+	
+	
+	
+	
+	
 	inline half rint(half arg) { return half(detail::binary, detail::integral<half::round_style,true,true>(arg.data_)); }
 
-	/// Nearest integer using half's internal rounding mode.
-	/// **See also:** Documentation for [std::lrint](https://en.cppreference.com/w/cpp/numeric/math/rint).
-	/// \param arg half expression to round
-	/// \return nearest integer using default rounding mode
-	/// \exception FE_INVALID if value is not representable as `long`
-	/// \exception FE_INEXACT if value had to be rounded
+	
+	
+	
+	
+	
+	
 	inline long lrint(half arg) { return detail::half2int<half::round_style,true,true,long>(arg.data_); }
 
-	/// Nearest integer using half's internal rounding mode.
-	/// **See also:** Documentation for [std::nearbyint](https://en.cppreference.com/w/cpp/numeric/math/nearbyint).
-	/// \param arg half expression to round
-	/// \return nearest integer using default rounding mode
-	/// \exception FE_INVALID for signaling NaN
+	
+	
+	
+	
+	
 	inline half nearbyint(half arg) { return half(detail::binary, detail::integral<half::round_style,true,false>(arg.data_)); }
 #if HALF_ENABLE_CPP11_LONG_LONG
-	/// Nearest integer.
-	/// **See also:** Documentation for [std::llround](https://en.cppreference.com/w/cpp/numeric/math/round).
-	/// \param arg half to round
-	/// \return nearest integer, rounded away from zero in half-way cases
-	/// \exception FE_INVALID if value is not representable as `long long`
+	
+	
+	
+	
+	
 	inline long long llround(half arg) { return detail::half2int<std::round_to_nearest,false,false,long long>(arg.data_); }
 
-	/// Nearest integer using half's internal rounding mode.
-	/// **See also:** Documentation for [std::llrint](https://en.cppreference.com/w/cpp/numeric/math/rint).
-	/// \param arg half expression to round
-	/// \return nearest integer using default rounding mode
-	/// \exception FE_INVALID if value is not representable as `long long`
-	/// \exception FE_INEXACT if value had to be rounded
+	
+	
+	
+	
+	
+	
 	inline long long llrint(half arg) { return detail::half2int<half::round_style,true,true,long long>(arg.data_); }
 #endif
 
-	/// \}
-	/// \anchor float
-	/// \name Floating point manipulation
-	/// \{
-
-	/// Decompress floating-point number.
-	/// **See also:** Documentation for [std::frexp](https://en.cppreference.com/w/cpp/numeric/math/frexp).
-	/// \param arg number to decompress
-	/// \param exp address to store exponent at
-	/// \return significant in range [0.5, 1)
-	/// \exception FE_INVALID for signaling NaN
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
 	inline half frexp(half arg, int *exp)
 	{
 		*exp = 0;
@@ -4162,15 +4162,15 @@ namespace half_float
 		return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF));
 	}
 
-	/// Multiply by power of two.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::scalbln](https://en.cppreference.com/w/cpp/numeric/math/scalbn).
-	/// \param arg number to modify
-	/// \param exp power of two to multiply with
-	/// \return \a arg multplied by 2 raised to \a exp
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half scalbln(half arg, long exp)
 	{
 		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
@@ -4188,34 +4188,34 @@ namespace half_float
 		return half(detail::binary, detail::rounded<half::round_style,false>(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0));
 	}
 
-	/// Multiply by power of two.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::scalbn](https://en.cppreference.com/w/cpp/numeric/math/scalbn).
-	/// \param arg number to modify
-	/// \param exp power of two to multiply with
-	/// \return \a arg multplied by 2 raised to \a exp
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half scalbn(half arg, int exp) { return scalbln(arg, exp); }
 
-	/// Multiply by power of two.
-	/// This function is exact to rounding for all rounding modes.
-	///
-	/// **See also:** Documentation for [std::ldexp](https://en.cppreference.com/w/cpp/numeric/math/ldexp).
-	/// \param arg number to modify
-	/// \param exp power of two to multiply with
-	/// \return \a arg multplied by 2 raised to \a exp
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half ldexp(half arg, int exp) { return scalbln(arg, exp); }
 
-	/// Extract integer and fractional parts.
-	/// **See also:** Documentation for [std::modf](https://en.cppreference.com/w/cpp/numeric/math/modf).
-	/// \param arg number to decompress
-	/// \param iptr address to store integer part at
-	/// \return fractional part
-	/// \exception FE_INVALID for signaling NaN
+	
+	
+	
+	
+	
+	
 	inline half modf(half arg, half *iptr)
 	{
 		unsigned int abs = arg.data_ & 0x7FFF;
@@ -4236,14 +4236,14 @@ namespace half_float
 		return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF));
 	}
 
-	/// Extract exponent.
-	/// **See also:** Documentation for [std::ilogb](https://en.cppreference.com/w/cpp/numeric/math/ilogb).
-	/// \param arg number to query
-	/// \return floating-point exponent
-	/// \retval FP_ILOGB0 for zero
-	/// \retval FP_ILOGBNAN for NaN
-	/// \retval INT_MAX for infinity
-	/// \exception FE_INVALID for 0 or infinite values
+	
+	
+	
+	
+	
+	
+	
+	
 	inline int ilogb(half arg)
 	{
 		int abs = arg.data_ & 0x7FFF, exp;
@@ -4256,12 +4256,12 @@ namespace half_float
 		return exp;
 	}
 
-	/// Extract exponent.
-	/// **See also:** Documentation for [std::logb](https://en.cppreference.com/w/cpp/numeric/math/logb).
-	/// \param arg number to query
-	/// \return floating-point exponent
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_DIVBYZERO for 0
+	
+	
+	
+	
+	
+	
 	inline half logb(half arg)
 	{
 		int abs = arg.data_ & 0x7FFF, exp;
@@ -4280,14 +4280,14 @@ namespace half_float
 		return half(detail::binary, value);
 	}
 
-	/// Next representable value.
-	/// **See also:** Documentation for [std::nextafter](https://en.cppreference.com/w/cpp/numeric/math/nextafter).
-	/// \param from value to compute next representable value for
-	/// \param to direction towards which to compute next value
-	/// \return next representable value after \a from in direction towards \a to
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW for infinite result from finite argument
-	/// \exception FE_UNDERFLOW for subnormal result
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half nextafter(half from, half to)
 	{
 		int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
@@ -4307,14 +4307,14 @@ namespace half_float
 		return half(detail::binary, out);
 	}
 
-	/// Next representable value.
-	/// **See also:** Documentation for [std::nexttoward](https://en.cppreference.com/w/cpp/numeric/math/nexttoward).
-	/// \param from value to compute next representable value for
-	/// \param to direction towards which to compute next value
-	/// \return next representable value after \a from in direction towards \a to
-	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW for infinite result from finite argument
-	/// \exception FE_UNDERFLOW for subnormal result
+	
+	
+	
+	
+	
+	
+	
+	
 	inline half nexttoward(half from, long double to)
 	{
 		int fabs = from.data_ & 0x7FFF;
@@ -4334,26 +4334,26 @@ namespace half_float
 		return half(detail::binary, out);
 	}
 
-	/// Take sign.
-	/// **See also:** Documentation for [std::copysign](https://en.cppreference.com/w/cpp/numeric/math/copysign).
-	/// \param x value to change sign for
-	/// \param y value to take sign from
-	/// \return value equal to \a x in magnitude and to \a y in sign
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR half copysign(half x, half y) { return half(detail::binary, x.data_^((x.data_^y.data_)&0x8000)); }
 
-	/// \}
-	/// \anchor classification
-	/// \name Floating point classification
-	/// \{
-
-	/// Classify floating-point value.
-	/// **See also:** Documentation for [std::fpclassify](https://en.cppreference.com/w/cpp/numeric/math/fpclassify).
-	/// \param arg number to classify
-	/// \retval FP_ZERO for positive and negative zero
-	/// \retval FP_SUBNORMAL for subnormal numbers
-	/// \retval FP_INFINITY for positive and negative infinity
-	/// \retval FP_NAN for NaNs
-	/// \retval FP_NORMAL for all other (normal) values
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR int fpclassify(half arg)
 	{
 		return	!(arg.data_&0x7FFF) ? FP_ZERO :
@@ -4363,212 +4363,212 @@ namespace half_float
 				FP_NAN;
 	}
 
-	/// Check if finite number.
-	/// **See also:** Documentation for [std::isfinite](https://en.cppreference.com/w/cpp/numeric/math/isfinite).
-	/// \param arg number to check
-	/// \retval true if neither infinity nor NaN
-	/// \retval false else
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; }
 
-	/// Check for infinity.
-	/// **See also:** Documentation for [std::isinf](https://en.cppreference.com/w/cpp/numeric/math/isinf).
-	/// \param arg number to check
-	/// \retval true for positive or negative infinity
-	/// \retval false else
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; }
 
-	/// Check for NaN.
-	/// **See also:** Documentation for [std::isnan](https://en.cppreference.com/w/cpp/numeric/math/isnan).
-	/// \param arg number to check
-	/// \retval true for NaNs
-	/// \retval false else
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; }
 
-	/// Check if normal number.
-	/// **See also:** Documentation for [std::isnormal](https://en.cppreference.com/w/cpp/numeric/math/isnormal).
-	/// \param arg number to check
-	/// \retval true if normal number
-	/// \retval false if either subnormal, zero, infinity or NaN
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); }
 
-	/// Check sign.
-	/// **See also:** Documentation for [std::signbit](https://en.cppreference.com/w/cpp/numeric/math/signbit).
-	/// \param arg number to check
-	/// \retval true for negative number
-	/// \retval false for positive number
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool signbit(half arg) { return (arg.data_&0x8000) != 0; }
 
-	/// \}
-	/// \anchor compfunc
-	/// \name Comparison
-	/// \{
-
-	/// Quiet comparison for greater than.
-	/// **See also:** Documentation for [std::isgreater](https://en.cppreference.com/w/cpp/numeric/math/isgreater).
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if \a x greater than \a y
-	/// \retval false else
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool isgreater(half x, half y)
 	{
 		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
 	}
 
-	/// Quiet comparison for greater equal.
-	/// **See also:** Documentation for [std::isgreaterequal](https://en.cppreference.com/w/cpp/numeric/math/isgreaterequal).
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if \a x greater equal \a y
-	/// \retval false else
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool isgreaterequal(half x, half y)
 	{
 		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
 	}
 
-	/// Quiet comparison for less than.
-	/// **See also:** Documentation for [std::isless](https://en.cppreference.com/w/cpp/numeric/math/isless).
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if \a x less than \a y
-	/// \retval false else
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool isless(half x, half y)
 	{
 		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
 	}
 
-	/// Quiet comparison for less equal.
-	/// **See also:** Documentation for [std::islessequal](https://en.cppreference.com/w/cpp/numeric/math/islessequal).
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if \a x less equal \a y
-	/// \retval false else
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool islessequal(half x, half y)
 	{
 		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
 	}
 
-	/// Quiet comarison for less or greater.
-	/// **See also:** Documentation for [std::islessgreater](https://en.cppreference.com/w/cpp/numeric/math/islessgreater).
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if either less or greater
-	/// \retval false else
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool islessgreater(half x, half y)
 	{
 		return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y);
 	}
 
-	/// Quiet check if unordered.
-	/// **See also:** Documentation for [std::isunordered](https://en.cppreference.com/w/cpp/numeric/math/isunordered).
-	/// \param x first operand
-	/// \param y second operand
-	/// \retval true if unordered (one or two NaN operands)
-	/// \retval false else
+	
+	
+	
+	
+	
+	
 	inline HALF_CONSTEXPR bool isunordered(half x, half y) { return isnan(x) || isnan(y); }
 
-	/// \}
-	/// \anchor casting
-	/// \name Casting
-	/// \{
-
-	/// Cast to or from half-precision floating-point number.
-	/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted
-	/// directly using the default rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
-	///
-	/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types
-	/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler
-	/// error and casting between [half](\ref half_float::half)s returns the argument unmodified.
-	/// \tparam T destination type (half or built-in arithmetic type)
-	/// \tparam U source type (half or built-in arithmetic type)
-	/// \param arg value to cast
-	/// \return \a arg converted to destination type
-	/// \exception FE_INVALID if \a T is integer type and result is not representable as \a T
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	template<typename T,typename U> T half_cast(U arg) { return detail::half_caster<T,U>::cast(arg); }
 
-	/// Cast to or from half-precision floating-point number.
-	/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted
-	/// directly using the specified rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
-	///
-	/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types
-	/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler
-	/// error and casting between [half](\ref half_float::half)s returns the argument unmodified.
-	/// \tparam T destination type (half or built-in arithmetic type)
-	/// \tparam R rounding mode to use.
-	/// \tparam U source type (half or built-in arithmetic type)
-	/// \param arg value to cast
-	/// \return \a arg converted to destination type
-	/// \exception FE_INVALID if \a T is integer type and result is not representable as \a T
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	template<typename T,std::float_round_style R,typename U> T half_cast(U arg) { return detail::half_caster<T,U,R>::cast(arg); }
-	/// \}
-
-	/// \}
-	/// \anchor errors
-	/// \name Error handling
-	/// \{
-
-	/// Clear exception flags.
-	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled,
-	/// but in that case manual flag management is the only way to raise flags.
-	///
-	/// **See also:** Documentation for [std::feclearexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feclearexcept).
-	/// \param excepts OR of exceptions to clear
-	/// \retval 0 all selected flags cleared successfully
+	
+
+	
+	
+	
+	
+
+	
+	
+	
+	
+	
+	
+	
 	inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; }
 
-	/// Test exception flags.
-	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled,
-	/// but in that case manual flag management is the only way to raise flags.
-	///
-	/// **See also:** Documentation for [std::fetestexcept](https://en.cppreference.com/w/cpp/numeric/fenv/fetestexcept).
-	/// \param excepts OR of exceptions to test
-	/// \return OR of selected exceptions if raised
+	
+	
+	
+	
+	
+	
+	
 	inline int fetestexcept(int excepts) { return detail::errflags() & excepts; }
 
-	/// Raise exception flags.
-	/// This raises the specified floating point exceptions and also invokes any additional automatic exception handling as
-	/// configured with the [HALF_ERRHANDLIG_...](\ref HALF_ERRHANDLING_ERRNO) preprocessor symbols.
-	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled,
-	/// but in that case manual flag management is the only way to raise flags.
-	///
-	/// **See also:** Documentation for [std::feraiseexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feraiseexcept).
-	/// \param excepts OR of exceptions to raise
-	/// \retval 0 all selected exceptions raised successfully
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; }
 
-	/// Save exception flags.
-	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled,
-	/// but in that case manual flag management is the only way to raise flags.
-	///
-	/// **See also:** Documentation for [std::fegetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag).
-	/// \param flagp adress to store flag state at
-	/// \param excepts OR of flags to save
-	/// \retval 0 for success
+	
+	
+	
+	
+	
+	
+	
+	
 	inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; }
 
-	/// Restore exception flags.
-	/// This only copies the specified exception state (including unset flags) without incurring any additional exception handling.
-	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled,
-	/// but in that case manual flag management is the only way to raise flags.
-	///
-	/// **See also:** Documentation for [std::fesetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag).
-	/// \param flagp adress to take flag state from
-	/// \param excepts OR of flags to restore
-	/// \retval 0 for success
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; }
 
-	/// Throw C++ exceptions based on set exception flags.
-	/// This function manually throws a corresponding C++ exception if one of the specified flags is set,
-	/// no matter if automatic throwing (via [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID)) is enabled or not.
-	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled,
-	/// but in that case manual flag management is the only way to raise flags.
-	/// \param excepts OR of exceptions to test
-	/// \param msg error message to use for exception description
-	/// \throw std::domain_error if `FE_INVALID` or `FE_DIVBYZERO` is selected and set
-	/// \throw std::overflow_error if `FE_OVERFLOW` is selected and set
-	/// \throw std::underflow_error if `FE_UNDERFLOW` is selected and set
-	/// \throw std::range_error if `FE_INEXACT` is selected and set
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
 	inline void fethrowexcept(int excepts, const char *msg = "")
 	{
 		excepts &= detail::errflags();
@@ -4581,7 +4581,7 @@ namespace half_float
 		if(excepts & FE_INEXACT)
 			throw std::range_error(msg);
 	}
-	/// \}
+	
 }
 
 
diff --git a/python/src/ngtpy.cpp b/python/src/ngtpy.cpp
index d320c5f..b5fbaa3 100644
--- a/python/src/ngtpy.cpp
+++ b/python/src/ngtpy.cpp
@@ -71,15 +71,18 @@ class BatchResults {
   }
   py::array_t<int> getIDs() {
     convert();
-    if (size == 0 || resultList[0].size() == 0) {
-      NGTThrowException("ngtpy::BatchResults::get: empty.");
+    if (size == 0 || resultList.size() == 0) {
+      std::stringstream msg;
+      msg << "ngtpy::BatchResults::getIDs: empty. " << size << ":" << resultList.size();
+      NGTThrowException(msg);
     }
     size_t nobjects = resultList[0].size();
     py::array_t<uint32_t> r({size, nobjects});
     auto wr = r.mutable_unchecked<2>();
     for (size_t idx = 0; idx < size; idx++) {
       if (resultList[idx].size() != nobjects) {
-	NGTThrowException("ngtpy::BatchResults::get: not knn results.");
+	std::cerr << "ngtpy::BatchResults::getIDs: not knn results. " << resultList[idx].size()
+		  << ":" << nobjects << std::endl;
       }
       for (auto ri = resultList[idx].begin(); ri != resultList[idx].end(); ++ri) {
 	wr(idx, std::distance(resultList[idx].begin(), ri)) = (*ri).id - 1;
@@ -590,7 +593,7 @@ class QuantizedIndex : public NGTQG::Index {
    size_t size, 		// the number of resultant objects
    float epsilon, 		// search parameter epsilon. the adequate range is from 0.0 to 0.05.
    float resultExpansion,	// the number of inner resultant objects
-   int edgeSize		// the number of used edges for each node during the exploration of the graph.
+   int edgeSize			// the number of used edges for each node during the exploration of the graph.
   ) {
     py::array_t<float> qobject(query);
     py::buffer_info qinfo = qobject.request();
@@ -602,7 +605,7 @@ class QuantizedIndex : public NGTQG::Index {
       resultExpansion	= resultExpansion >= 0.0 ? resultExpansion : defaultResultExpansion;
       edgeSize		= edgeSize >= -2 ? edgeSize : defaultEdgeSize;
       sc.setSize(size);				// the number of resulting objects.
-      sc.setRadius(defaultRadius);			// the radius of search.
+      sc.setRadius(defaultRadius);		// the radius of search.
       sc.setEpsilon(epsilon);			// set exploration coefficient.
       sc.setResultExpansion(resultExpansion);	// set result expansion.
       sc.setEdgeSize(edgeSize);			// if maxEdge is minus, the specified value in advance is used.
@@ -628,7 +631,6 @@ class QuantizedIndex : public NGTQG::Index {
 	    r.pop();
 	  }
 	}
-
 	return ids;
       }
       py::list results;
@@ -690,8 +692,9 @@ class QuantizedBlobIndex : public QBG::Index {
    bool   zeroBasedNumbering,		// object ID numbering.
    bool   treeDisabled,			// not use the tree index.
    bool   logDisabled,			// stderr log is disabled.
-   bool   readOnly			// open mode
-  ):QBG::Index(path, readOnly) {
+   bool   readOnly,			// open mode.
+   const  std::string refinementObjectTypeString	// object type for distance refinement.
+  ):QBG::Index(path, readOnly, !logDisabled, refinementObjectType(refinementObjectTypeString)) {
     zeroNumbering = zeroBasedNumbering;
     numOfDistanceComputations = 0;
     treeIndex = !treeDisabled;
@@ -706,6 +709,28 @@ class QuantizedBlobIndex : public QBG::Index {
     defaultNumOfProbes = 0;
   }
 
+  static NGTQ::DataType refinementObjectType(const std::string type) {
+    NGTQ::DataType objectType = NGTQ::DataTypeAny;
+    if (type == "Float" || type == "float") {
+      objectType = NGTQ::DataTypeFloat;
+    } else if (type == "Byte" || type == "byte") {
+      objectType = NGTQ::DataTypeUint8;
+#ifdef NGT_HALF_FLOAT
+    } else if (type == "Float16" || type == "float16") {
+      objectType = NGTQ::DataTypeFloat16;
+#endif
+    } else if (type == "Any" || type == "any") {
+      objectType = NGTQ::DataTypeAny;
+    } else if (type == "None" || type == "none") {
+      objectType = NGTQ::DataTypeNone;
+    } else {
+      std::stringstream msg;
+      msg << "ngtpy::create: invalid object type. " << objectType;
+      NGTThrowException(msg);
+    }
+    return objectType;
+  }
+
   void batchInsert(
    py::array_t<double> objects,
    bool debug = false
@@ -737,7 +762,7 @@ class QuantizedBlobIndex : public QBG::Index {
 
   py::array_t<uint32_t> batchSearchTmp(
    py::array_t<float> queries,
-   size_t size
+   size_t size	 		
   ) {
     const py::buffer_info &qinfo = queries.request();
     const std::vector<long int> &qshape = qinfo.shape;
@@ -776,7 +801,7 @@ class QuantizedBlobIndex : public QBG::Index {
     return results;
   }
 
-  void batchSearchInTwoSteps(
+  void parallelSearchInTwoSteps(
     py::array_t<float> queries,
     BatchResults &results,
     size_t size
@@ -785,7 +810,8 @@ class QuantizedBlobIndex : public QBG::Index {
     const std::vector<long int> &qshape = qinfo.shape;
     auto nOfQueries = qshape[0];
     size_t dimension = qshape[1];
-    size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension();
+    //size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension();
+    auto pseudoDimension = QBG::Index::getQuantizer().property.dimension;
     auto *queryPtr = static_cast<float*>(qinfo.ptr);
 
     size	= size > 0 ? size : defaultNumOfSearchObjects;
@@ -794,14 +820,22 @@ class QuantizedBlobIndex : public QBG::Index {
     results.resultList.clear();
     results.results.resize(nOfQueries);
 
+    auto resultExpansion = defaultResultExpansion;
+    size_t exactResultSize = 0;
+    if (resultExpansion >= 1.0) {
+      exactResultSize = size;
+      size = static_cast<float>(size) * resultExpansion;
+    }
+
 #pragma omp parallel for schedule(dynamic)
     for (int idx = 0; idx < nOfQueries; idx++) {
       float *qptr = queryPtr + idx * dimension;
-      vector<float> query(psedoDimension, 0);
+      vector<float> query(pseudoDimension, 0);
       memcpy(query.data(), qptr, dimension * sizeof(float));
       QBG::SearchContainer sc;
       sc.setObjectVector(query);
       sc.setSize(size);
+      sc.setExactResultSize(exactResultSize);
       sc.setEpsilon(defaultEpsilon);
       sc.setBlobEpsilon(defaultBlobEpsilon);
       sc.setEdgeSize(defaultEdgeSize);
@@ -816,7 +850,50 @@ class QuantizedBlobIndex : public QBG::Index {
     return;
   }
 
-  void batchSearchInOneStep(
+  void batchSearchInTwoSteps(
+    py::array_t<float> queries,
+    BatchResults &results,
+    size_t size
+  ) {
+    const py::buffer_info &qinfo = queries.request();
+    const std::vector<long int> &qshape = qinfo.shape;
+    auto nOfQueries = qshape[0];
+    size_t dimension = qshape[1];
+    //size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension();
+    auto pseudoDimension = QBG::Index::getQuantizer().property.dimension;
+    auto *queryPtr = static_cast<float*>(qinfo.ptr);
+
+    size	= size > 0 ? size : defaultNumOfSearchObjects;
+
+    results.results.clear();
+    results.resultList.clear();
+
+    std::unique_ptr<float[]> qs(new float[queries.size() * pseudoDimension]);
+#pragma omp parallel for
+    for (int idx = 0; idx < nOfQueries; idx++) {
+      float *qptr = queryPtr + idx * dimension;
+      float *qsptr = &qs[idx * pseudoDimension];
+      memset(qsptr + dimension, 0, sizeof(float) * (pseudoDimension - dimension));
+      memcpy(qsptr, qptr, dimension * sizeof(float));
+    }
+    QBG::BatchSearchContainer sc;
+    sc.setObjectVectors(&qs[0], nOfQueries, pseudoDimension);
+    sc.setSize(size);
+    sc.setRefinementExpansion(defaultResultExpansion);
+    sc.setEpsilon(defaultEpsilon);
+    sc.setBlobEpsilon(defaultBlobEpsilon);
+    sc.setEdgeSize(defaultEdgeSize);
+    sc.setNumOfProbes(defaultNumOfProbes);
+#ifdef NGTQBG_FUNCTION_SELECTOR
+    sc.functionSelector = defaultFunctionSelector;	/////////////////
+#endif
+    QBG::Index::searchInTwoSteps(sc);
+    results.resultList = std::move(sc.getBatchResult());
+    results.size = results.resultList.size();
+    return;
+  }
+
+  void parallelSearchInOneStep(
     py::array_t<float> queries,
     BatchResults &results,
     size_t size
@@ -825,7 +902,7 @@ class QuantizedBlobIndex : public QBG::Index {
     const std::vector<long int> &qshape = qinfo.shape;
     auto nOfQueries = qshape[0];
     size_t dimension = qshape[1];
-    size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension();
+    size_t pseudoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension();
     auto *queryPtr = static_cast<float*>(qinfo.ptr);
 
     size	= size > 0 ? size : defaultNumOfSearchObjects;
@@ -834,21 +911,20 @@ class QuantizedBlobIndex : public QBG::Index {
     results.resultList.clear();
     results.results.resize(nOfQueries);
 
-    size_t searchSize = size;
-    size_t searchExactResultSize = 0;
-    if (defaultExactResultExpansion >= 1.0) {
-      searchSize = static_cast<float>(size) * defaultExactResultExpansion;
-      searchExactResultSize = size;
+    size_t exactResultSize = 0;
+    if (defaultResultExpansion >= 1.0) {
+      size = static_cast<float>(size) * defaultResultExpansion;
+      exactResultSize = size;
     }
 #pragma omp parallel for schedule(dynamic)
     for (int idx = 0; idx < nOfQueries; idx++) {
       float *qptr = queryPtr + idx * dimension;
-      vector<float> query(psedoDimension, 0);
+      vector<float> query(pseudoDimension, 0);
       memcpy(query.data(), qptr, dimension * sizeof(float));
       QBG::SearchContainer sc;
       sc.setObjectVector(query);
-      sc.setSize(searchSize);
-      sc.setExactResultSize(searchExactResultSize);
+      sc.setSize(size);
+      sc.setExactResultSize(exactResultSize);
       sc.setEpsilon(defaultEpsilon);
       sc.setBlobEpsilon(defaultBlobEpsilon);
       sc.setEdgeSize(defaultEdgeSize);
@@ -873,7 +949,7 @@ class QuantizedBlobIndex : public QBG::Index {
       NGTThrowException(msg);
     }
     if (defaultNumOfProbes == 0) {
-      batchSearchInOneStep(queries, results, size);
+      parallelSearchInOneStep(queries, results, size);
     } else {
       batchSearchInTwoSteps(queries, results, size);
     }
@@ -895,7 +971,7 @@ class QuantizedBlobIndex : public QBG::Index {
     const std::vector<long int> &qshape = qinfo.shape;
     auto nOfQueries = qshape[0];
     size_t dimension = qshape[1];
-    size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension();
+    size_t pseudoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension();
     auto *queryPtr = static_cast<float*>(qinfo.ptr);
     radius = radius >= 0 ? radius : defaultRadius;
     radius = sqrt(radius);
@@ -907,7 +983,7 @@ class QuantizedBlobIndex : public QBG::Index {
 #pragma omp parallel for schedule(dynamic)
     for (int idx = 0; idx < nOfQueries; idx++) {
       float *qptr = queryPtr + idx * dimension;
-      vector<float> query(psedoDimension, 0);
+      vector<float> query(pseudoDimension, 0);
       memcpy(query.data(), qptr, dimension * sizeof(float));
       QBG::SearchContainer sc;
       sc.setObjectVector(query);
@@ -937,12 +1013,18 @@ class QuantizedBlobIndex : public QBG::Index {
       sc.setObjectVector(qvector);
       size		= size > 0 ? size : defaultNumOfSearchObjects;
       epsilon		= epsilon > -1.0 ? epsilon : defaultEpsilon;
+#if 0
       if (defaultExactResultExpansion >= 1.0) {
-	sc.setSize(static_cast<float>(size) * defaultExactResultExpansion);
-	sc.setExactResultSize(size);
+       sc.setSize(static_cast<float>(size) * defaultExactResultExpansion);
+       sc.setExactResultSize(size);
       } else {
-	sc.setSize(size);				// the number of resulting objects.
+       sc.setSize(size);                               // the number of resulting objects.
       }
+#else
+      sc.setSize(size);
+      //std::cerr << "pass defaultResultExpansion=" << defaultResultExpansion << std::endl;
+      sc.setRefinementExpansion(defaultResultExpansion);
+#endif
       sc.setEpsilon(epsilon);			// set exploration coefficient.
       sc.setBlobEpsilon(defaultBlobEpsilon);
       sc.setEdgeSize(defaultEdgeSize);
@@ -973,7 +1055,6 @@ class QuantizedBlobIndex : public QBG::Index {
 	    r.pop();
 	  }
 	}
-
 	return ids;
       }
       py::list results;
@@ -1201,13 +1282,14 @@ PYBIND11_MODULE(ngtpy, m) {
 
 
     py::class_<QuantizedBlobIndex>(m, "QuantizedBlobIndex")
-      .def(py::init<const std::string &, size_t, bool, bool, bool, bool>(),
+      .def(py::init<const std::string &, size_t, bool, bool, bool, bool, const std::string &>(),
            py::arg("path"),
 	   py::arg("max_no_of_edges") = 128,
            py::arg("zero_based_numbering") = true,
 	   py::arg("tree_disabled") = false,
-           py::arg("log_disabled") = false,
-           py::arg("read_only") = true)
+           py::arg("log_disabled") = true,
+           py::arg("read_only") = true,
+	   py::arg("refinement_object_type") = "Any")
       .def("save", (void (QBG::Index::*)()) &QBG::Index::save)
       .def("batch_insert", &::QuantizedBlobIndex::batchInsert,
            py::arg("objects"),
diff --git a/samples/qbg-capi/qbg-capi.cpp b/samples/qbg-capi/qbg-capi.cpp
index 3610bba..4f1db87 100644
--- a/samples/qbg-capi/qbg-capi.cpp
+++ b/samples/qbg-capi/qbg-capi.cpp
@@ -85,7 +85,7 @@ main(int argc, char **argv)
   std::cerr << "building the index..." << std::endl;
   QBGBuildParameters buildParameters;
   qbg_initialize_build_parameters(&buildParameters);
-  buildParameters.number_of_objects = 500;
+  buildParameters.number_of_objects = 500;		
   auto status = qbg_build_index(indexPath.c_str(), &buildParameters, err);
   if (!status) {
     std::cerr << "Cannot build. " << ngt_get_error_string(err) << std::endl;