diff --git a/pom.xml b/pom.xml
index ecb15d4b84..2dead29b50 100644
--- a/pom.xml
+++ b/pom.xml
@@ -105,10 +105,18 @@
               <mainClass>io.anserini.index.IndexCollection</mainClass>
               <id>IndexCollection</id>
             </program>
+            <program>
+              <mainClass>io.anserini.index.IndexVectorCollection</mainClass>
+              <id>IndexVectorCollection</id>
+            </program>
             <program>
               <mainClass>io.anserini.search.SearchCollection</mainClass>
               <id>SearchCollection</id>
             </program>
+            <program>
+              <mainClass>io.anserini.search.SearchVectorCollection</mainClass>
+              <id>SearchVectorCollection</id>
+            </program>
             <program>
               <mainClass>io.anserini.search.SearchMsmarco</mainClass>
               <id>SearchMsmarco</id>
diff --git a/src/main/java/io/anserini/collection/VectorCollection.java b/src/main/java/io/anserini/collection/VectorCollection.java
new file mode 100644
index 0000000000..30ebbb74cf
--- /dev/null
+++ b/src/main/java/io/anserini/collection/VectorCollection.java
@@ -0,0 +1,93 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.collection;
+
+import com.fasterxml.jackson.databind.JsonNode;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * A document collection for encoded dense vectors for ANN (HNSW) search.
+ * The "vector" field are concatenated into the "contents" field for indexing.
+ */
+public class VectorCollection extends DocumentCollection<VectorCollection.Document> {
+  public VectorCollection(Path path) {
+    this.path = path;
+  }
+
+  @Override
+  public FileSegment<VectorCollection.Document> createFileSegment(Path p) throws IOException {
+    return new VectorCollection.Segment<>(p);
+  }
+
+  public static class Segment<T extends VectorCollection.Document> extends JsonCollection.Segment<T> {
+    public Segment(Path path) throws IOException {
+      super(path);
+    }
+
+    @Override
+    protected Document createNewDocument(JsonNode json) {
+      return new Document(json);
+    }
+  }
+
+  public static class Document extends JsonCollection.Document {
+    private final String id;
+    private final String contents;
+    private final String raw;
+    private Map<String, String> fields;
+
+    public Document(JsonNode json) {
+      super();
+      this.raw = json.toPrettyString();
+      this.id = json.get("docid").asText();
+      this.contents = json.get("vector").toString();
+      // We're not going to index any other fields, so just initialize an empty map.
+      this.fields = new HashMap<>();
+    }
+
+    @Override
+    public String id() {
+      if (id == null) {
+        throw new RuntimeException("JSON document has no \"_id\" field!");
+      }
+      return id;
+    }
+
+    @Override
+    public String contents() {
+      if (contents == null) {
+        throw new RuntimeException("JSON document has no contents that could be parsed!");
+      }
+      return contents;
+    }
+
+    @Override
+    public String raw() {
+      return raw;
+    }
+
+    @Override
+    public Map<String, String> fields() {
+      return fields;
+    }
+  }
+}
diff --git a/src/main/java/io/anserini/index/IndexArgs.java b/src/main/java/io/anserini/index/IndexArgs.java
index 70957978da..4ae249c967 100644
--- a/src/main/java/io/anserini/index/IndexArgs.java
+++ b/src/main/java/io/anserini/index/IndexArgs.java
@@ -36,9 +36,12 @@ public class IndexArgs {
   // This is the name of the field in the Lucene document where the entity document is stored.
   public static final String ENTITY = "entity";
 
+  // This is the name of the field in the Lucene document where the vector document is stored.
+  public static final String VECTOR = "vector";
 
   private static final int TIMEOUT = 600 * 1000;
 
+
   // required arguments
 
   @Option(name = "-input", metaVar = "[path]", required = true,
diff --git a/src/main/java/io/anserini/index/IndexVectorArgs.java b/src/main/java/io/anserini/index/IndexVectorArgs.java
new file mode 100644
index 0000000000..96a247e617
--- /dev/null
+++ b/src/main/java/io/anserini/index/IndexVectorArgs.java
@@ -0,0 +1,120 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.index;
+
+import org.kohsuke.args4j.Option;
+import org.kohsuke.args4j.spi.StringArrayOptionHandler;
+
+
+public class IndexVectorArgs {
+
+  // This is the name of the field in the Lucene document where the docid is stored.
+  public static final String ID = "id";
+
+  // This is the name of the field in the Lucene document that should be searched by default.
+  public static final String CONTENTS = "contents";
+
+  // This is the name of the field in the Lucene document where the raw document is stored.
+  public static final String RAW = "raw";
+
+  // This is the name of the field in the Lucene document where the vector document is stored.
+  public static final String VECTOR = "vector";
+
+  private static final int TIMEOUT = 600 * 1000;
+
+
+  // required arguments
+
+  @Option(name = "-input", metaVar = "[path]", required = true,
+      usage = "Location of input collection.")
+  public String input;
+
+  @Option(name = "-threads", metaVar = "[num]", required = true,
+      usage = "Number of indexing threads.")
+  public int threads;
+
+  @Option(name = "-collection", metaVar = "[class]", required = true,
+      usage = "Collection class in package 'io.anserini.collection'.")
+  public String collectionClass;
+
+  @Option(name = "-generator", metaVar = "[class]",
+      usage = "Document generator class in package 'io.anserini.index.generator'.")
+  public String generatorClass = "DefaultLuceneDocumentGenerator";
+
+  // optional general arguments
+
+  @Option(name = "-verbose", forbids = {"-quiet"},
+      usage = "Enables verbose logging for each indexing thread; can be noisy if collection has many small file segments.")
+  public boolean verbose = false;
+
+  @Option(name = "-quiet", forbids = {"-verbose"},
+      usage = "Turns off all logging.")
+  public boolean quiet = false;
+
+  // optional arguments
+
+  @Option(name = "-index", metaVar = "[path]", usage = "Index path.")
+  public String index;
+
+  @Option(name = "-fields", handler = StringArrayOptionHandler.class,
+      usage = "List of fields to index (space separated), in addition to the default 'contents' field.")
+  public String[] fields = new String[]{};
+
+  @Option(name = "-storePositions",
+      usage = "Boolean switch to index store term positions; needed for phrase queries.")
+  public boolean storePositions = false;
+
+  @Option(name = "-storeDocvectors",
+      usage = "Boolean switch to store document vectors; needed for (pseudo) relevance feedback.")
+  public boolean storeDocvectors = false;
+
+  @Option(name = "-storeContents",
+      usage = "Boolean switch to store document contents.")
+  public boolean storeContents = false;
+
+  @Option(name = "-storeRaw",
+      usage = "Boolean switch to store raw source documents.")
+  public boolean storeRaw = false;
+
+  @Option(name = "-optimize",
+      usage = "Boolean switch to optimize index (i.e., force merge) into a single segment; costly for large collections.")
+  public boolean optimize = false;
+
+  @Option(name = "-uniqueDocid",
+      usage = "Removes duplicate documents with the same docid during indexing. This significantly slows indexing throughput " +
+              "but may be needed for tweet collections since the streaming API might deliver a tweet multiple times.")
+  public boolean uniqueDocid = false;
+
+  @Option(name = "-memorybuffer", metaVar = "[mb]",
+      usage = "Memory buffer size (in MB).")
+  public int memorybufferSize = 2048;
+
+  @Option(name = "-whitelist", metaVar = "[file]",
+      usage = "File containing list of docids, one per line; only these docids will be indexed.")
+  public String whitelist = null;
+
+
+  // Sharding options
+
+  @Option(name = "-shard.count", metaVar = "[n]",
+      usage = "Number of shards to partition the document collection into.")
+  public int shardCount = -1;
+
+  @Option(name = "-shard.current", metaVar = "[n]",
+      usage = "The current shard number to generate (indexed from 0).")
+  public int shardCurrent = -1;
+}
diff --git a/src/main/java/io/anserini/index/IndexVectorCollection.java b/src/main/java/io/anserini/index/IndexVectorCollection.java
new file mode 100644
index 0000000000..30c8bc4862
--- /dev/null
+++ b/src/main/java/io/anserini/index/IndexVectorCollection.java
@@ -0,0 +1,382 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.index;
+
+import io.anserini.collection.DocumentCollection;
+import io.anserini.collection.FileSegment;
+import io.anserini.collection.SourceDocument;
+import io.anserini.index.generator.EmptyDocumentException;
+import io.anserini.index.generator.InvalidDocumentException;
+import io.anserini.index.generator.LuceneDocumentGenerator;
+import io.anserini.index.generator.SkippedDocumentException;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.time.DurationFormatUtils;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.core.config.Configurator;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.ConcurrentMergeScheduler;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.OptionHandlerFilter;
+import org.kohsuke.args4j.ParserProperties;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+
+public final class IndexVectorCollection {
+  private static final Logger LOG = LogManager.getLogger(IndexVectorCollection.class);
+
+  // This is the default analyzer used, unless another stemming algorithm or language is specified.
+  public final class Counters {
+    /**
+     * Counter for successfully indexed documents.
+     */
+    public AtomicLong indexed = new AtomicLong();
+
+    /**
+     * Counter for empty documents that are not indexed. Empty documents are not necessary errors;
+     * it could be the case, for example, that a document is comprised solely of stopwords.
+     */
+    public AtomicLong empty = new AtomicLong();
+
+    /**
+     * Counter for unindexable documents. These are cases where {@link SourceDocument#indexable()}
+     * returns false.
+     */
+    public AtomicLong unindexable = new AtomicLong();
+
+    /**
+     * Counter for skipped documents. These are cases documents are skipped as part of normal
+     * processing logic, e.g., using a whitelist, not indexing retweets or deleted tweets.
+     */
+    public AtomicLong skipped = new AtomicLong();
+
+    /**
+     * Counter for unexpected errors.
+     */
+    public AtomicLong errors = new AtomicLong();
+  }
+
+  private final class LocalIndexerThread extends Thread {
+    final private Path inputFile;
+    final private IndexWriter writer;
+    final private DocumentCollection collection;
+    private FileSegment fileSegment;
+
+    private LocalIndexerThread(IndexWriter writer, DocumentCollection collection, Path inputFile) {
+      this.writer = writer;
+      this.collection = collection;
+      this.inputFile = inputFile;
+      setName(inputFile.getFileName().toString());
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public void run() {
+      try {
+        LuceneDocumentGenerator generator = (LuceneDocumentGenerator)
+            generatorClass.getDeclaredConstructor(IndexVectorArgs.class).newInstance(args);
+
+        // We keep track of two separate counts: the total count of documents in this file segment (cnt),
+        // and the number of documents in this current "batch" (batch). We update the global counter every
+        // 10k documents: this is so that we get intermediate updates, which is informative if a collection
+        // has only one file segment; see https://github.com/castorini/anserini/issues/683
+        int cnt = 0;
+        int batch = 0;
+
+        FileSegment<SourceDocument> segment = collection.createFileSegment(inputFile);
+        // in order to call close() and clean up resources in case of exception
+        this.fileSegment = segment;
+
+        for (SourceDocument d : segment) {
+          if (!d.indexable()) {
+            counters.unindexable.incrementAndGet();
+            continue;
+          }
+
+          Document doc;
+          try {
+            doc = generator.createDocument(d);
+          } catch (EmptyDocumentException e1) {
+            counters.empty.incrementAndGet();
+            continue;
+          } catch (SkippedDocumentException e2) {
+            counters.skipped.incrementAndGet();
+            continue;
+          } catch (InvalidDocumentException e3) {
+            counters.errors.incrementAndGet();
+            continue;
+          }
+
+          if (whitelistDocids != null && !whitelistDocids.contains(d.id())) {
+            counters.skipped.incrementAndGet();
+            continue;
+          }
+
+          if (args.uniqueDocid) {
+            writer.updateDocument(new Term("id", d.id()), doc);
+          } else {
+            writer.addDocument(doc);
+          }
+          cnt++;
+          batch++;
+
+          // And the counts from this batch, reset batch counter.
+          if (batch % 10000 == 0) {
+            counters.indexed.addAndGet(batch);
+            batch = 0;
+          }
+        }
+
+        // Add the remaining documents.
+        counters.indexed.addAndGet(batch);
+
+        int skipped = segment.getSkippedCount();
+        if (skipped > 0) {
+          // When indexing tweets, this is normal, because there are delete messages that are skipped over.
+          counters.skipped.addAndGet(skipped);
+          LOG.warn(inputFile.getParent().getFileName().toString() + File.separator +
+              inputFile.getFileName().toString() + ": " + skipped + " docs skipped.");
+        }
+
+        if (segment.getErrorStatus()) {
+          counters.errors.incrementAndGet();
+          LOG.error(inputFile.getParent().getFileName().toString() + File.separator +
+              inputFile.getFileName().toString() + ": error iterating through segment.");
+        }
+
+        // Log at the debug level because this can be quite noisy if there are lots of file segments.
+        LOG.debug(inputFile.getParent().getFileName().toString() + File.separator +
+            inputFile.getFileName().toString() + ": " + cnt + " docs added.");
+      } catch (Exception e) {
+        LOG.error(Thread.currentThread().getName() + ": Unexpected Exception:", e);
+      } finally {
+        if (fileSegment != null) {
+            fileSegment.close();
+        }
+      }
+    }
+  }
+
+  private final IndexVectorArgs args;
+  private final Path collectionPath;
+  private final Set whitelistDocids;
+  private final Class collectionClass;
+  private final Class generatorClass;
+  private final DocumentCollection collection;
+  private final Counters counters;
+  private Path indexPath;
+
+  @SuppressWarnings("unchecked")
+  public IndexVectorCollection(IndexVectorArgs args) throws Exception {
+    this.args = args;
+
+    if (args.verbose) {
+      // If verbose logging enabled, changed default log level to DEBUG so we get per-thread logging messages.
+      Configurator.setRootLevel(Level.DEBUG);
+      LOG.info("Setting log level to " + Level.DEBUG);
+    } else if (args.quiet) {
+      // If quiet mode enabled, only report warnings and above.
+      Configurator.setRootLevel(Level.WARN);
+    } else {
+      // Otherwise, we get the standard set of log messages.
+      Configurator.setRootLevel(Level.INFO);
+      LOG.info("Setting log level to " + Level.INFO);
+    }
+
+    LOG.info("Starting indexer...");
+    LOG.info("============ Loading Parameters ============");
+    LOG.info("DocumentCollection path: " + args.input);
+    LOG.info("CollectionClass: " + args.collectionClass);
+    LOG.info("Generator: " + args.generatorClass);
+    LOG.info("Threads: " + args.threads);
+    LOG.info("Store document \"contents\" field? " + args.storeContents);
+    LOG.info("Store document \"raw\" field? " + args.storeRaw);
+    LOG.info("Optimize (merge segments)? " + args.optimize);
+    LOG.info("Whitelist: " + args.whitelist);
+    LOG.info("Index path: " + args.index);
+
+    if (args.index != null) {
+      this.indexPath = Paths.get(args.index);
+      if (!Files.exists(this.indexPath)) {
+        Files.createDirectories(this.indexPath);
+      }
+    }
+
+    // Our documentation uses /path/to/foo as a convention: to make copy and paste of the commands work, we assume
+    // collections/ as the path location.
+    String pathStr = args.input;
+    if (pathStr.startsWith("/path/to")) {
+      pathStr = pathStr.replace("/path/to", "collections");
+    }
+    collectionPath = Paths.get(pathStr);
+    if (!Files.exists(collectionPath) || !Files.isReadable(collectionPath) || !Files.isDirectory(collectionPath)) {
+      throw new RuntimeException("Document directory " + collectionPath.toString() + " does not exist or is not readable, please check the path");
+    }
+
+    this.generatorClass = Class.forName("io.anserini.index.generator." + args.generatorClass);
+    this.collectionClass = Class.forName("io.anserini.collection." + args.collectionClass);
+
+    // Initialize the collection.
+    collection = (DocumentCollection) this.collectionClass.getConstructor(Path.class).newInstance(collectionPath);
+
+    if (args.whitelist != null) {
+      List<String> lines = FileUtils.readLines(new File(args.whitelist), "utf-8");
+      this.whitelistDocids = new HashSet<>(lines);
+    } else {
+      this.whitelistDocids = null;
+    }
+
+    this.counters = new Counters();
+  }
+
+  public Counters run() throws IOException {
+    final long start = System.nanoTime();
+    LOG.info("============ Indexing Collection ============");
+
+    int numThreads = args.threads;
+    IndexWriter writer = null;
+
+    // Used for LocalIndexThread
+    if (indexPath != null) {
+      final Directory dir = FSDirectory.open(indexPath);
+      final IndexWriterConfig config = new IndexWriterConfig();
+      config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+      config.setRAMBufferSizeMB(args.memorybufferSize);
+      config.setUseCompoundFile(false);
+      config.setMergeScheduler(new ConcurrentMergeScheduler());
+      writer = new IndexWriter(dir, config);
+    }
+
+    final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads);
+    LOG.info("Thread pool with " + numThreads + " threads initialized.");
+
+    LOG.info("Initializing collection in " + collectionPath.toString());
+
+    List<?> segmentPaths = collection.getSegmentPaths();
+    // when we want sharding to be done
+    if (args.shardCount > 1) {
+      segmentPaths = collection.getSegmentPaths(args.shardCount, args.shardCurrent);
+    }
+    final int segmentCnt = segmentPaths.size();
+
+    LOG.info(String.format("%,d %s found", segmentCnt, (segmentCnt == 1 ? "file" : "files" )));
+    LOG.info("Starting to index...");
+
+    for (int i = 0; i < segmentCnt; i++) {
+      executor.execute(new LocalIndexerThread(writer, collection, (Path) segmentPaths.get(i)));
+    }
+
+    executor.shutdown();
+
+    try {
+      // Wait for existing tasks to terminate
+      while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
+        if (segmentCnt == 1) {
+          LOG.info(String.format("%,d documents indexed", counters.indexed.get()));
+        } else {
+          LOG.info(String.format("%.2f%% of files completed, %,d documents indexed",
+              (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d, counters.indexed.get()));
+        }
+      }
+    } catch (InterruptedException ie) {
+      // (Re-)Cancel if current thread also interrupted
+      executor.shutdownNow();
+      // Preserve interrupt status
+      Thread.currentThread().interrupt();
+    }
+
+    if (segmentCnt != executor.getCompletedTaskCount()) {
+      throw new RuntimeException("totalFiles = " + segmentCnt +
+          " is not equal to completedTaskCount =  " + executor.getCompletedTaskCount());
+    }
+
+    long numIndexed = writer.getDocStats().maxDoc;
+
+    // Do a final commit
+    try {
+      if (writer != null) {
+        writer.commit();
+        if (args.optimize) {
+          writer.forceMerge(1);
+        }
+      }
+    } finally {
+      try {
+        if (writer != null) {
+          writer.close();
+        }
+      } catch (IOException e) {
+        // It is possible that this happens... but nothing much we can do at this point,
+        // so just log the error and move on.
+        LOG.error(e);
+      }
+    }
+
+    if (numIndexed != counters.indexed.get()) {
+      LOG.warn("Unexpected difference between number of indexed documents and index maxDoc.");
+    }
+
+    LOG.info(String.format("Indexing Complete! %,d documents indexed", numIndexed));
+    LOG.info("============ Final Counter Values ============");
+    LOG.info(String.format("indexed:     %,12d", counters.indexed.get()));
+    LOG.info(String.format("unindexable: %,12d", counters.unindexable.get()));
+    LOG.info(String.format("empty:       %,12d", counters.empty.get()));
+    LOG.info(String.format("skipped:     %,12d", counters.skipped.get()));
+    LOG.info(String.format("errors:      %,12d", counters.errors.get()));
+
+    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
+    LOG.info(String.format("Total %,d documents indexed in %s", numIndexed,
+        DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss")));
+
+    return counters;
+  }
+
+  public static void main(String[] args) throws Exception {
+    IndexVectorArgs indexCollectionArgs = new IndexVectorArgs();
+    CmdLineParser parser = new CmdLineParser(indexCollectionArgs, ParserProperties.defaults().withUsageWidth(100));
+
+    try {
+      parser.parseArgument(args);
+    } catch (CmdLineException e) {
+      System.err.println(e.getMessage());
+      parser.printUsage(System.err);
+      System.err.println("Example: " + IndexVectorCollection.class.getSimpleName() +
+          parser.printExample(OptionHandlerFilter.REQUIRED));
+      return;
+    }
+
+    new IndexVectorCollection(indexCollectionArgs).run();
+  }
+}
diff --git a/src/main/java/io/anserini/index/generator/LuceneVectorDocumentGenerator.java b/src/main/java/io/anserini/index/generator/LuceneVectorDocumentGenerator.java
new file mode 100644
index 0000000000..73437c9d60
--- /dev/null
+++ b/src/main/java/io/anserini/index/generator/LuceneVectorDocumentGenerator.java
@@ -0,0 +1,93 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.index.generator;
+
+import io.anserini.collection.SourceDocument;
+import io.anserini.index.IndexVectorArgs;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.VectorSimilarityFunction;
+
+
+/**
+ * Converts a {@link SourceDocument} into a Lucene {@link Document}, ready to be indexed.
+ *
+ * @param <T> type of the source document
+ */
+public class LuceneVectorDocumentGenerator<T extends SourceDocument> implements LuceneDocumentGenerator<T> {
+  protected IndexVectorArgs args;
+
+  protected LuceneVectorDocumentGenerator() {
+  }
+
+  /**
+   * Constructor with config and counters
+   *
+   * @param args configuration arguments
+   */
+  public LuceneVectorDocumentGenerator(IndexVectorArgs args) {
+    this.args = args;
+  }
+
+  private float[] convertJsonArray(String vectorString) throws JsonMappingException, JsonProcessingException {
+    ObjectMapper mapper = new ObjectMapper();
+    ArrayList<Float> denseVector = mapper.readValue(vectorString, new TypeReference<ArrayList<Float>>(){});
+    int length = denseVector.size();
+    float[] vector = new float[length];
+    int i = 0;
+    for (Float f : denseVector) {
+      vector[i++] = f;
+    }
+    return vector;
+  }
+
+  @Override
+  public Document createDocument(T src) throws InvalidDocumentException {
+    String id = src.id();
+    float[] contents;
+
+    try {
+      contents = convertJsonArray(src.contents());
+    } catch (Exception e) {
+      throw new InvalidDocumentException();
+    }
+
+    // Make a new, empty document.
+    final Document document = new Document();
+
+    // Store the collection docid.
+    document.add(new StringField(IndexVectorArgs.ID, id, Field.Store.YES));
+    // This is needed to break score ties by docid.
+    document.add(new KnnVectorField(IndexVectorArgs.VECTOR, contents, VectorSimilarityFunction.DOT_PRODUCT));
+    if (args.storeRaw) {
+      document.add(new StoredField(IndexVectorArgs.RAW, src.raw()));
+    }
+    return document;
+  }
+}
diff --git a/src/main/java/io/anserini/search/SearchVectorArgs.java b/src/main/java/io/anserini/search/SearchVectorArgs.java
new file mode 100644
index 0000000000..04e0e92a8d
--- /dev/null
+++ b/src/main/java/io/anserini/search/SearchVectorArgs.java
@@ -0,0 +1,103 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.search;
+
+import org.kohsuke.args4j.Option;
+import org.kohsuke.args4j.spi.StringArrayOptionHandler;
+
+
+public class SearchVectorArgs {
+  // required arguments
+  @Option(name = "-index", metaVar = "[path]", required = true, usage = "Path to Lucene index")
+  public String index;
+
+  @Option(name = "-topics", metaVar = "[file]", handler = StringArrayOptionHandler.class, required = true, usage = "topics file")
+  public String[] topics;
+
+  @Option(name = "-output", metaVar = "[file]", required = true, usage = "output file")
+  public String output;
+
+  @Option(name = "-topicreader", required = true, usage = "TopicReader to use.")
+  public String topicReader;
+
+  // optional arguments
+  @Option(name = "-querygenerator", usage = "QueryGenerator to use.")
+  public String queryGenerator = "BagOfWordsQueryGenerator";
+
+  @Option(name = "-threads", metaVar = "[int]", usage = "Number of threads to use for running different parameter configurations.")
+  public int threads = 1;
+
+  @Option(name = "-parallelism", metaVar = "[int]", usage = "Number of threads to use for each individual parameter configuration.")
+  public int parallelism = 8;
+
+  @Option(name = "-removeQuery", usage = "Remove docids that have the query id when writing final run output.")
+  public Boolean removeQuery = false;
+
+  // Note that this option is set to false by default because duplicate documents usually indicate some underlying
+  // indexing issues, and we don't want to just eat errors silently.
+  @Option(name = "-removedups", usage = "Remove duplicate docids when writing final run output.")
+  public Boolean removedups = false;
+
+  @Option(name = "-skipexists", usage = "When enabled, will skip if the run file exists")
+  public Boolean skipexists = false;
+
+  @Option(name = "-hits", metaVar = "[number]", required = false, usage = "max number of hits to return")
+  public int hits = 1000;
+
+  @Option(name = "-efSearch", metaVar = "[number]", required = false, usage = "efSearch parameter for HNSW search")
+  public int efSearch = 100;
+
+  @Option(name = "-inmem", usage = "Boolean switch to read index in memory")
+  public Boolean inmem = false;
+
+  @Option(name = "-topicfield", usage = "Which field of the query should be used, default \"title\"." +
+      " For TREC ad hoc topics, description or narrative can be used.")
+  public String topicfield = "title";
+
+  @Option(name = "-runtag", metaVar = "[tag]", usage = "runtag")
+  public String runtag = null;
+
+  @Option(name = "-format", metaVar = "[output format]", usage = "Output format, default \"trec\", alternative \"msmarco\".")
+  public String format = "trec";
+
+  // ---------------------------------------------
+  // Simple built-in support for passage retrieval
+  // ---------------------------------------------
+
+  // A simple approach to passage retrieval is to pre-segment documents in the corpus into passages and index those
+  // passages. At retrieval time, we retain only the max scoring passage from each document; this is often called MaxP,
+  // from Dai and Callan (SIGIR 2019) in the context of BERT, although the general approach dates back to Callan
+  // (SIGIR 1994), Hearst and Plaunt (SIGIR 1993), and lots of other papers from the 1990s and even earlier.
+  //
+  // One common convention is to label the passages of a docid as "docid.00000", "docid.00001", "docid.00002", ...
+  // We use this convention in CORD-19. Alternatively, in document expansion for the MS MARCO document corpus, we use
+  // '#' as the delimiter.
+  //
+  // The options below control various aspects of this behavior.
+
+  @Option(name = "-selectMaxPassage", usage = "Select and retain only the max scoring segment from each document.")
+  public Boolean selectMaxPassage = false;
+
+  @Option(name = "-selectMaxPassage.delimiter", metaVar = "[regexp]",
+      usage = "The delimiter (as a regular regression) for splitting the segment id from the doc id.")
+  public String selectMaxPassage_delimiter = "\\.";
+
+  @Option(name = "-selectMaxPassage.hits", metaVar = "[int]",
+      usage = "Maximum number of hits to return per topic after segment id removal. " +
+              "Note that this is different from '-hits', which specifies the number of hits including the segment id. ")
+  public int selectMaxPassage_hits = Integer.MAX_VALUE;
+}
diff --git a/src/main/java/io/anserini/search/SearchVectorCollection.java b/src/main/java/io/anserini/search/SearchVectorCollection.java
new file mode 100644
index 0000000000..b92d42a381
--- /dev/null
+++ b/src/main/java/io/anserini/search/SearchVectorCollection.java
@@ -0,0 +1,335 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.search;
+
+import io.anserini.index.IndexVectorArgs;
+import io.anserini.rerank.ScoredDocuments;
+import io.anserini.search.query.VectorQueryGenerator;
+import io.anserini.search.topicreader.TopicReader;
+import org.apache.commons.lang3.time.DurationFormatUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.KnnVectorQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.MMapDirectory;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.OptionHandlerFilter;
+import org.kohsuke.args4j.ParserProperties;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Main entry point for search.
+ */
+public final class SearchVectorCollection implements Closeable {
+  // These are the default tie-breaking rules for documents that end up with the same score with respect to a query.
+  // For most collections, docids are strings, and we break ties by lexicographic sort order. For tweets, docids are
+  // longs, and we break ties by reverse numerical sort order (i.e., most recent tweet first). This means that searching
+  // tweets requires a slightly different code path, which is enabled by the -searchtweets option in SearchVectorArgs.
+  public static final Sort BREAK_SCORE_TIES_BY_DOCID =
+      new Sort(SortField.FIELD_SCORE, new SortField(IndexVectorArgs.ID, SortField.Type.STRING_VAL));
+
+  private static final Logger LOG = LogManager.getLogger(SearchVectorCollection.class);
+
+  private final SearchVectorArgs args;
+  private final IndexReader reader;
+
+  private final class SearcherThread<K> extends Thread {
+    final private IndexReader reader;
+    final private IndexSearcher searcher;
+    final private SortedMap<K, Map<String, String>> topics;
+    final private String outputPath;
+    final private String runTag;
+
+    private SearcherThread(IndexReader reader, SortedMap<K, Map<String, String>> topics, String outputPath, String runTag) {
+      this.reader = reader;
+      this.topics = topics;
+      this.runTag = runTag;
+      this.outputPath = outputPath;
+      this.searcher = new IndexSearcher(this.reader);
+      setName(outputPath);
+    }
+
+    @Override
+    public void run() {
+      try {
+        // A short descriptor of the ranking setup.
+        final String desc = String.format("ranker: kNN");
+        // ThreadPool for parallelizing the execution of individual queries:
+        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(args.parallelism);
+        // Data structure for holding the per-query results, with the qid as the key and the results (the lines that
+        // will go into the final run file) as the value.
+        ConcurrentSkipListMap<K, String> results = new ConcurrentSkipListMap<>();
+        AtomicInteger cnt = new AtomicInteger();
+
+        final long start = System.nanoTime();
+        for (Map.Entry<K, Map<String, String>> entry : topics.entrySet()) {
+          K qid = entry.getKey();
+
+          // This is the per-query execution, in parallel.
+          executor.execute(() -> {
+            // This is for holding the results.
+            StringBuilder out = new StringBuilder();
+            String queryString = entry.getValue().get(args.topicfield);
+            ScoredDocuments docs;
+            try {
+              docs = search(this.searcher, queryString);
+            } catch (IOException e) {
+              throw new CompletionException(e);
+            }
+
+            // For removing duplicate docids.
+            Set<String> docids = new HashSet<>();
+
+            int rank = 1;
+            for (int i = 0; i < docs.documents.length; i++) {
+              String docid = docs.documents[i].get(IndexVectorArgs.ID);
+
+              if (args.selectMaxPassage) {
+                docid = docid.split(args.selectMaxPassage_delimiter)[0];
+              }
+
+              if (docids.contains(docid))
+                continue;
+
+              // Remove docids that are identical to the query id if flag is set.
+              if (args.removeQuery && docid.equals(qid))
+                continue;
+
+              if ("msmarco".equals(args.format)) {
+                // MS MARCO output format:
+                out.append(String.format(Locale.US, "%s\t%s\t%d\n", qid, docid, rank));
+              } else {
+                // Standard TREC format:
+                // + the first column is the topic number.
+                // + the second column is currently unused and should always be "Q0".
+                // + the third column is the official document identifier of the retrieved document.
+                // + the fourth column is the rank the document is retrieved.
+                // + the fifth column shows the score (integer or floating point) that generated the ranking.
+                // + the sixth column is called the "run tag" and should be a unique identifier for your
+                out.append(String.format(Locale.US, "%s Q0 %s %d %f %s\n",
+                    qid, docid, rank, docs.scores[i], runTag));
+              }
+
+              // Note that this option is set to false by default because duplicate documents usually indicate some
+              // underlying indexing issues, and we don't want to just eat errors silently.
+              //
+              // However, we we're performing passage retrieval, i.e., with "selectMaxSegment", we *do* want to remove
+              // duplicates.
+              if (args.removedups || args.selectMaxPassage) {
+                docids.add(docid);
+              }
+
+              rank++;
+
+              if (args.selectMaxPassage && rank > args.selectMaxPassage_hits) {
+                break;
+              }
+            }
+
+            results.put(qid, out.toString());
+            int n = cnt.incrementAndGet();
+            if (n % 100 == 0) {
+              LOG.info(String.format("%s: %d queries processed", desc, n));
+            }
+          });
+        }
+
+        executor.shutdown();
+
+        try {
+          // Wait for existing tasks to terminate.
+          while (!executor.awaitTermination(1, TimeUnit.MINUTES));
+        } catch (InterruptedException ie) {
+          // (Re-)Cancel if current thread also interrupted.
+          executor.shutdownNow();
+          // Preserve interrupt status.
+          Thread.currentThread().interrupt();
+        }
+        final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
+
+        LOG.info(desc + ": " + topics.size() + " queries processed in " +
+            DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss") +
+            String.format(" = ~%.2f q/s", topics.size()/(durationMillis/1000.0)));
+
+        // Now we write the results to a run file.
+        PrintWriter out = new PrintWriter(Files.newBufferedWriter(Paths.get(outputPath), StandardCharsets.UTF_8));
+
+        // This is the default case: just dump out the qids by their natural order.
+        for (K qid : results.keySet()) {
+          out.print(results.get(qid));
+        }
+        out.flush();
+        out.close();
+
+      } catch (Exception e) {
+        LOG.error(Thread.currentThread().getName() + ": Unexpected Exception: ", e);
+      }
+    }
+  }
+
+  public SearchVectorCollection(SearchVectorArgs args) throws IOException {
+    this.args = args;
+    Path indexPath = Paths.get(args.index);
+
+    if (!Files.exists(indexPath) || !Files.isDirectory(indexPath) || !Files.isReadable(indexPath)) {
+      throw new IllegalArgumentException(String.format("Index path '%s' does not exist or is not a directory.", args.index));
+    }
+
+    LOG.info("============ Initializing Searcher ============");
+    LOG.info("Index: " + indexPath);
+    this.reader = args.inmem ? DirectoryReader.open(MMapDirectory.open(indexPath)) :
+        DirectoryReader.open(FSDirectory.open(indexPath));
+    LOG.info("Vector Search:");
+    LOG.info("Number of threads for running different parameter configurations: " + args.threads);
+  }
+
+  @Override
+  public void close() throws IOException {
+    reader.close();
+  }
+
+  @SuppressWarnings("unchecked")
+  public <K> void runTopics() throws IOException {
+    TopicReader<K> tr;
+    SortedMap<K, Map<String, String>> topics = new TreeMap<>();
+    for (String singleTopicsFile : args.topics) {
+      Path topicsFilePath = Paths.get(singleTopicsFile);
+      if (!Files.exists(topicsFilePath) || !Files.isRegularFile(topicsFilePath) || !Files.isReadable(topicsFilePath)) {
+        throw new IllegalArgumentException("Topics file : " + topicsFilePath + " does not exist or is not a (readable) file.");
+      }
+      try {
+        tr = (TopicReader<K>) Class.forName("io.anserini.search.topicreader." + args.topicReader + "TopicReader")
+            .getConstructor(Path.class).newInstance(topicsFilePath);
+        topics.putAll(tr.read());
+      } catch (Exception e) {
+        e.printStackTrace();
+        throw new IllegalArgumentException("Unable to load topic reader: " + args.topicReader);
+      }
+    }
+
+    final String runTag = args.runtag == null ? "Anserini" : args.runtag;
+    LOG.info("runtag: " + runTag);
+
+    final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(args.threads);
+
+
+    LOG.info("============ Launching Search Threads ============");
+
+    String outputPath = args.output;
+    if (args.skipexists && new File(outputPath).exists()) {
+      LOG.info("Run already exists, skipping: " + outputPath);
+    } else {
+      executor.execute(new SearcherThread<>(reader, topics, outputPath, runTag));
+      executor.shutdown();
+    }
+
+    try {
+      // Wait for existing tasks to terminate
+      while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
+      }
+    } catch (InterruptedException ie) {
+      // (Re-)Cancel if current thread also interrupted
+      executor.shutdownNow();
+      // Preserve interrupt status
+      Thread.currentThread().interrupt();
+    }
+  }
+
+  public ScoredDocuments search(IndexSearcher searcher, String queryString) throws IOException {
+    KnnVectorQuery query;
+    VectorQueryGenerator generator;
+    try {
+      generator = (VectorQueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator)
+          .getConstructor().newInstance();
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
+    }
+
+    // If fieldsMap isn't null, then it means that the -fields option is specified. In this case, we search across
+    // multiple fields with the associated boosts.
+    query = generator.buildQuery(IndexVectorArgs.VECTOR, queryString, args.efSearch);
+
+    TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[]{});
+    rs = searcher.search(query, args.hits);
+    ScoredDocuments scoredDocs;
+    scoredDocs = ScoredDocuments.fromTopDocs(rs, searcher);
+
+    return scoredDocs;
+  }
+
+
+  public static void main(String[] args) throws Exception {
+    SearchVectorArgs searchArgs = new SearchVectorArgs();
+    CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(100));
+
+    try {
+      parser.parseArgument(args);
+    } catch (CmdLineException e) {
+      System.err.println(e.getMessage());
+      parser.printUsage(System.err);
+      System.err.println("Example: SearchCollection" + parser.printExample(OptionHandlerFilter.REQUIRED));
+      return;
+    }
+
+    final long start = System.nanoTime();
+    SearchVectorCollection searcher;
+
+    // We're at top-level already inside a main; makes no sense to propagate exceptions further, so reformat the
+    // exception messages and display on console.
+    try {
+      searcher = new SearchVectorCollection(searchArgs);
+    } catch (IllegalArgumentException e) {
+      System.err.println(e.getMessage());
+      return;
+    }
+
+    searcher.runTopics();
+    searcher.close();
+    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
+    LOG.info("Total run time: " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
+  }
+}
diff --git a/src/main/java/io/anserini/search/query/VectorQueryGenerator.java b/src/main/java/io/anserini/search/query/VectorQueryGenerator.java
new file mode 100644
index 0000000000..71ed76d019
--- /dev/null
+++ b/src/main/java/io/anserini/search/query/VectorQueryGenerator.java
@@ -0,0 +1,48 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.search.query;
+
+import java.util.ArrayList;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.apache.lucene.search.KnnVectorQuery;
+
+public class VectorQueryGenerator {
+
+  private float[] convertJsonArray(String vectorString) throws JsonMappingException, JsonProcessingException {
+    ObjectMapper mapper = new ObjectMapper();
+    ArrayList<Float> denseVector = mapper.readValue(vectorString, new TypeReference<ArrayList<Float>>(){});
+    int length = denseVector.size();
+    float[] vector = new float[length];
+    int i = 0;
+    for (Float f : denseVector) {
+      vector[i++] = f;
+    }
+    return vector;
+  }
+  
+  public KnnVectorQuery buildQuery(String field, String queryString, Integer topK) throws JsonMappingException, JsonProcessingException{
+    float[] queryVector;
+    queryVector = convertJsonArray(queryString);
+    KnnVectorQuery knnQuery = new KnnVectorQuery(field, queryVector, topK);
+    return knnQuery;
+  }
+}
diff --git a/src/main/java/io/anserini/search/topicreader/JsonIntVectorTopicReader.java b/src/main/java/io/anserini/search/topicreader/JsonIntVectorTopicReader.java
new file mode 100644
index 0000000000..830cfc1357
--- /dev/null
+++ b/src/main/java/io/anserini/search/topicreader/JsonIntVectorTopicReader.java
@@ -0,0 +1,51 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.search.topicreader;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class JsonIntVectorTopicReader extends TopicReader<Integer> {
+
+  public JsonIntVectorTopicReader(Path topicFile) {
+    super(topicFile);
+  }
+
+  @Override
+  public SortedMap<Integer, Map<String, String>> read(BufferedReader reader) throws IOException {
+    SortedMap<Integer, Map<String, String>> map = new TreeMap<>();
+    String line;
+    ObjectMapper mapper = new ObjectMapper();
+    while ((line = reader.readLine()) != null) {
+      line = line.trim();
+      JsonNode lineNode = mapper.readerFor(JsonNode.class).readTree(line);
+      Integer topicID = lineNode.get("qid").asInt();
+      Map<String, String> fields = new HashMap<>();
+      fields.put("vector", lineNode.get("vector").toString());
+      map.put(topicID, fields);
+    }
+    return map;
+  }
+}
diff --git a/src/main/java/io/anserini/search/topicreader/JsonStringVectorTopicReader.java b/src/main/java/io/anserini/search/topicreader/JsonStringVectorTopicReader.java
new file mode 100644
index 0000000000..b5b4695503
--- /dev/null
+++ b/src/main/java/io/anserini/search/topicreader/JsonStringVectorTopicReader.java
@@ -0,0 +1,52 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+package io.anserini.search.topicreader;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class JsonStringVectorTopicReader extends TopicReader<String> {
+
+  public JsonStringVectorTopicReader(Path topicFile) {
+    super(topicFile);
+  }
+
+  @Override
+  public SortedMap<String, Map<String, String>> read(BufferedReader reader) throws IOException {
+    SortedMap<String, Map<String, String>> map = new TreeMap<>();
+    String line;
+    ObjectMapper mapper = new ObjectMapper();
+    while ((line = reader.readLine()) != null) {
+      line = line.trim();
+      JsonNode lineNode = mapper.readerFor(JsonNode.class).readTree(line);
+      String topicID = lineNode.get("qid").asText();
+      Map<String, String> fields = new HashMap<>();
+      fields.put("vector", lineNode.get("vector").toString());
+      map.put(topicID, fields);
+    }
+    return map;
+  }
+}