From b364cf54bfd95326c33f3592417253bec8e1aee8 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Fri, 9 Oct 2015 23:20:04 +0300 Subject: [PATCH 01/12] Introduce Primary Terms Every shard group in Elasticsearch has a selected copy called a primary. When a primary shard fails a new primary would be selected from the existing replica copies. This PR introduces `primary terms` to track the number of times this has happened. This will allow us, as follow up work and among other things, to identify operations that come from old stale primaries. It is also the first step in road towards sequence numbers. Relates to #10708 Closes #14062 --- .../elasticsearch/cluster/ClusterState.java | 31 ++-- .../cluster/metadata/IndexMetaData.java | 151 +++++++++++++++--- .../cluster/routing/IndexRoutingTable.java | 23 ++- .../routing/IndexShardRoutingTable.java | 28 ++-- .../cluster/routing/RoutingNodes.java | 2 + .../cluster/routing/RoutingTable.java | 10 +- .../cluster/routing/ShardRouting.java | 50 +++++- .../routing/allocation/AllocationService.java | 82 ++++++++-- .../routing/allocation/RoutingAllocation.java | 24 ++- .../ExceptionSerializationTests.java | 2 +- .../health/ClusterHealthResponsesTests.java | 9 +- .../TransportBroadcastByNodeActionTests.java | 30 ++-- .../BroadcastReplicationTests.java | 2 +- .../ClusterStateCreationUtils.java | 24 +-- .../cluster/ClusterStateDiffIT.java | 3 +- .../elasticsearch/cluster/DiskUsageTests.java | 28 ++-- .../metadata/ToAndFromJsonMetaDataTests.java | 46 +++--- .../cluster/routing/AllocationIdTests.java | 10 +- .../cluster/routing/RoutingTableTests.java | 127 +++++++++++++-- .../cluster/routing/ShardRoutingHelper.java | 2 +- .../cluster/routing/ShardRoutingTests.java | 81 +++++++--- .../cluster/routing/TestShardRouting.java | 27 ++-- .../cluster/routing/UnassignedInfoTests.java | 2 +- .../allocation/CatAllocationTestCase.java | 2 +- .../PrimaryElectionRoutingTests.java | 34 ++-- .../routing/allocation/ShardStateIT.java | 70 ++++++++ .../allocation/StartedShardsRoutingTests.java | 88 ++++++---- .../decider/DiskThresholdDeciderTests.java | 48 +++--- .../DiskThresholdDeciderUnitTests.java | 32 ++-- .../zen/NodeJoinControllerTests.java | 6 +- .../gateway/PrimaryShardAllocatorTests.java | 54 ++++--- .../gateway/PriorityComparatorTests.java | 21 +-- .../gateway/RecoveryFromGatewayIT.java | 73 ++++++++- .../gateway/ReplicaShardAllocatorTests.java | 8 +- .../index/shard/IndexShardTests.java | 10 +- .../indices/flush/SyncedFlushUnitTests.java | 2 +- .../store/IndicesStoreIntegrationIT.java | 2 +- .../indices/store/IndicesStoreTests.java | 27 ++-- 38 files changed, 895 insertions(+), 376 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardStateIT.java diff --git a/core/src/main/java/org/elasticsearch/cluster/ClusterState.java b/core/src/main/java/org/elasticsearch/cluster/ClusterState.java index 5b84ec4367a47..b50df2268ecee 100644 --- a/core/src/main/java/org/elasticsearch/cluster/ClusterState.java +++ b/core/src/main/java/org/elasticsearch/cluster/ClusterState.java @@ -21,7 +21,6 @@ import com.carrotsearch.hppc.cursors.ObjectCursor; import com.carrotsearch.hppc.cursors.ObjectObjectCursor; - import org.elasticsearch.cluster.DiffableUtils.KeyedReader; import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlocks; @@ -31,12 +30,7 @@ import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.cluster.routing.IndexRoutingTable; -import org.elasticsearch.cluster.routing.IndexShardRoutingTable; -import org.elasticsearch.cluster.routing.RoutingNode; -import org.elasticsearch.cluster.routing.RoutingNodes; -import org.elasticsearch.cluster.routing.RoutingTable; -import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.*; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.service.InternalClusterService; import org.elasticsearch.common.Nullable; @@ -57,11 +51,7 @@ import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction; import java.io.IOException; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * Represents the current state of the cluster. @@ -137,7 +127,7 @@ public static T lookupPrototype(String type) { public static T lookupPrototypeSafe(String type) { @SuppressWarnings("unchecked") - T proto = (T)customPrototypes.get(type); + T proto = (T) customPrototypes.get(type); if (proto == null) { throw new IllegalArgumentException("No custom state prototype registered for type [" + type + "]"); } @@ -478,6 +468,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.endArray(); + builder.startObject("primary_terms"); + for (int shard = 0; shard < indexMetaData.getNumberOfShards(); shard++) { + builder.field(Integer.toString(shard), indexMetaData.primaryTerm(shard)); + } + builder.endObject(); + builder.endObject(); } builder.endObject(); @@ -593,6 +589,7 @@ public Builder nodes(DiscoveryNodes nodes) { public Builder routingResult(RoutingAllocation.Result routingResult) { this.routingTable = routingResult.routingTable(); + this.metaData = routingResult.metaData(); return this; } @@ -673,16 +670,16 @@ public static byte[] toBytes(ClusterState state) throws IOException { } /** - * @param data input bytes - * @param localNode used to set the local node in the cluster state. + * @param data input bytes + * @param localNode used to set the local node in the cluster state. */ public static ClusterState fromBytes(byte[] data, DiscoveryNode localNode) throws IOException { return readFrom(StreamInput.wrap(data), localNode); } /** - * @param in input stream - * @param localNode used to set the local node in the cluster state. can be null. + * @param in input stream + * @param localNode used to set the local node in the cluster state. can be null. */ public static ClusterState readFrom(StreamInput in, @Nullable DiscoveryNode localNode) throws IOException { return PROTO.readFrom(in, localNode); diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java b/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java index 42e9a4b2244b5..5c03ba8b163bd 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java @@ -19,6 +19,7 @@ package org.elasticsearch.cluster.metadata; +import com.carrotsearch.hppc.LongArrayList; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import org.elasticsearch.Version; @@ -28,6 +29,7 @@ import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.node.DiscoveryNodeFilters; +import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.collect.ImmutableOpenMap; @@ -46,10 +48,7 @@ import java.io.IOException; import java.text.ParseException; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; +import java.util.*; import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.OpType.AND; import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.OpType.OR; @@ -58,7 +57,7 @@ /** * */ -public class IndexMetaData implements Diffable, FromXContentBuilder, ToXContent { +public class IndexMetaData implements Diffable, FromXContentBuilder, ToXContent { public static final IndexMetaData PROTO = IndexMetaData.builder("") .settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)) @@ -145,6 +144,7 @@ public static State fromString(String state) { throw new IllegalStateException("No state match for [" + state + "]"); } } + public static final String INDEX_SETTING_PREFIX = "index."; public static final String SETTING_NUMBER_OF_SHARDS = "index.number_of_shards"; public static final String SETTING_NUMBER_OF_REPLICAS = "index.number_of_replicas"; @@ -173,6 +173,7 @@ public static State fromString(String state) { private final String index; private final long version; + private final long[] primaryTerms; private final State state; @@ -194,7 +195,7 @@ public static State fromString(String state) { private final Version indexUpgradedVersion; private final org.apache.lucene.util.Version minimumCompatibleLuceneVersion; - private IndexMetaData(String index, long version, State state, Settings settings, ImmutableOpenMap mappings, ImmutableOpenMap aliases, ImmutableOpenMap customs) { + private IndexMetaData(String index, long version, long[] primaryTerms, State state, Settings settings, ImmutableOpenMap mappings, ImmutableOpenMap aliases, ImmutableOpenMap customs) { Integer maybeNumberOfShards = settings.getAsInt(SETTING_NUMBER_OF_SHARDS, null); if (maybeNumberOfShards == null) { throw new IllegalArgumentException("must specify numberOfShards for index [" + index + "]"); @@ -212,10 +213,12 @@ private IndexMetaData(String index, long version, State state, Settings settings if (numberOfReplicas < 0) { throw new IllegalArgumentException("must specify non-negative number of shards for index [" + index + "]"); } + this.settings = settings; this.index = index; this.version = version; + this.primaryTerms = primaryTerms; + assert primaryTerms.length == numberOfShards; this.state = state; - this.settings = settings; this.mappings = mappings; this.customs = customs; this.numberOfShards = numberOfShards; @@ -248,7 +251,7 @@ private IndexMetaData(String index, long version, State state, Settings settings try { this.minimumCompatibleLuceneVersion = org.apache.lucene.util.Version.parse(stringLuceneVersion); } catch (ParseException ex) { - throw new IllegalStateException("Cannot parse lucene version [" + stringLuceneVersion + "] in the [" + SETTING_VERSION_MINIMUM_COMPATIBLE +"] setting", ex); + throw new IllegalStateException("Cannot parse lucene version [" + stringLuceneVersion + "] in the [" + SETTING_VERSION_MINIMUM_COMPATIBLE + "] setting", ex); } } else { this.minimumCompatibleLuceneVersion = null; @@ -279,6 +282,16 @@ public long getVersion() { return this.version; } + + /** + * The term of the current selected primary. This is a non-negative number incremented when + * a primary shard is assigned after a full cluster restart (see {@link ShardRouting#initialize(java.lang.String, long)} + * or a replica shard is promoted to a primary (see {@link ShardRouting#moveToPrimary()}). + **/ + public long primaryTerm(int shardId) { + return this.primaryTerms[shardId]; + } + /** * Return the {@link Version} on which this index has been created. This * information is typically useful for backward compatibility. @@ -390,6 +403,10 @@ public boolean equals(Object o) { IndexMetaData that = (IndexMetaData) o; + if (version != that.version) { + return false; + } + if (!aliases.equals(that.aliases)) { return false; } @@ -408,19 +425,27 @@ public boolean equals(Object o) { if (!customs.equals(that.customs)) { return false; } + + if (Arrays.equals(primaryTerms, that.primaryTerms) == false) { + return false; + } return true; } @Override public int hashCode() { int result = index.hashCode(); + result = 31 * result + Long.hashCode(version); result = 31 * result + state.hashCode(); result = 31 * result + aliases.hashCode(); result = 31 * result + settings.hashCode(); result = 31 * result + mappings.hashCode(); + result = 31 * result + customs.hashCode(); + result = 31 * result + Arrays.hashCode(primaryTerms); return result; } + @Override public Diff diff(IndexMetaData previousState) { return new IndexMetaDataDiff(previousState, this); @@ -446,6 +471,7 @@ private static class IndexMetaDataDiff implements Diff { private final String index; private final long version; + private final long[] primaryTerms; private final State state; private final Settings settings; private final Diff> mappings; @@ -457,6 +483,7 @@ public IndexMetaDataDiff(IndexMetaData before, IndexMetaData after) { version = after.version; state = after.state; settings = after.settings; + primaryTerms = after.primaryTerms; mappings = DiffableUtils.diff(before.mappings, after.mappings); aliases = DiffableUtils.diff(before.aliases, after.aliases); customs = DiffableUtils.diff(before.customs, after.customs); @@ -467,6 +494,7 @@ public IndexMetaDataDiff(StreamInput in) throws IOException { version = in.readLong(); state = State.fromId(in.readByte()); settings = Settings.readSettingsFromStream(in); + primaryTerms = in.readVLongArray(); mappings = DiffableUtils.readImmutableOpenMapDiff(in, MappingMetaData.PROTO); aliases = DiffableUtils.readImmutableOpenMapDiff(in, AliasMetaData.PROTO); customs = DiffableUtils.readImmutableOpenMapDiff(in, new DiffableUtils.KeyedReader() { @@ -488,6 +516,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(version); out.writeByte(state.id); Settings.writeSettingsToStream(settings, out); + out.writeVLongArray(primaryTerms); mappings.writeTo(out); aliases.writeTo(out); customs.writeTo(out); @@ -499,6 +528,7 @@ public IndexMetaData apply(IndexMetaData part) { builder.version(version); builder.state(state); builder.settings(settings); + builder.primaryTerms(primaryTerms); builder.mappings.putAll(mappings.apply(part.mappings)); builder.aliases.putAll(aliases.apply(part.aliases)); builder.customs.putAll(customs.apply(part.customs)); @@ -512,6 +542,7 @@ public IndexMetaData readFrom(StreamInput in) throws IOException { builder.version(in.readLong()); builder.state(State.fromId(in.readByte())); builder.settings(readSettingsFromStream(in)); + builder.primaryTerms(in.readVLongArray()); int mappingsSize = in.readVInt(); for (int i = 0; i < mappingsSize; i++) { MappingMetaData mappingMd = MappingMetaData.PROTO.readFrom(in); @@ -537,6 +568,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(version); out.writeByte(state.id()); writeSettingsToStream(settings, out); + out.writeVLongArray(primaryTerms); out.writeVInt(mappings.size()); for (ObjectCursor cursor : mappings.values()) { cursor.value.writeTo(out); @@ -565,6 +597,7 @@ public static class Builder { private String index; private State state = State.OPEN; private long version = 1; + private long[] primaryTerms = null; private Settings settings = Settings.Builder.EMPTY_SETTINGS; private final ImmutableOpenMap.Builder mappings; private final ImmutableOpenMap.Builder aliases; @@ -582,6 +615,7 @@ public Builder(IndexMetaData indexMetaData) { this.state = indexMetaData.state; this.version = indexMetaData.version; this.settings = indexMetaData.getSettings(); + this.primaryTerms = indexMetaData.primaryTerms.clone(); this.mappings = ImmutableOpenMap.builder(indexMetaData.mappings); this.aliases = ImmutableOpenMap.builder(indexMetaData.aliases); this.customs = ImmutableOpenMap.builder(indexMetaData.customs); @@ -613,7 +647,7 @@ public Builder numberOfReplicas(int numberOfReplicas) { public int numberOfReplicas() { return settings.getAsInt(SETTING_NUMBER_OF_REPLICAS, -1); } - + public Builder creationDate(long creationDate) { settings = settingsBuilder().put(settings).put(SETTING_CREATION_DATE, creationDate).build(); return this; @@ -624,8 +658,7 @@ public long creationDate() { } public Builder settings(Settings.Builder settings) { - this.settings = settings.build(); - return this; + return settings(settings.build()); } public Builder settings(Settings settings) { @@ -702,6 +735,42 @@ public Builder version(long version) { return this; } + /** + * returns the primary term for the given shard. + * See {@link IndexMetaData#primaryTerm(int)} for more information. + */ + public long primaryTerm(int shardId) { + if (primaryTerms == null) { + initializePrimaryTerms(); + } + return this.primaryTerms[shardId]; + } + + /** + * sets the primary term for the given shard. + * See {@link IndexMetaData#primaryTerm(int)} for more information. + */ + public Builder primaryTerm(int shardId, long primaryTerm) { + if (primaryTerms == null) { + initializePrimaryTerms(); + } + this.primaryTerms[shardId] = primaryTerm; + return this; + } + + private void primaryTerms(long[] primaryTerms) { + this.primaryTerms = primaryTerms.clone(); + } + + private void initializePrimaryTerms() { + assert primaryTerms == null; + if (numberOfShards() < 0) { + throw new IllegalStateException("you must set the number of shards before setting/reading primary terms"); + } + primaryTerms = new long[numberOfShards()]; + } + + public IndexMetaData build() { ImmutableOpenMap.Builder tmpAliases = aliases; Settings tmpSettings = settings; @@ -714,24 +783,40 @@ public IndexMetaData build() { } } - return new IndexMetaData(index, version, state, tmpSettings, mappings.build(), tmpAliases.build(), customs.build()); + if (primaryTerms == null) { + initializePrimaryTerms(); + } else if (primaryTerms.length != numberOfShards()) { + throw new IllegalStateException("primaryTerms length is [" + primaryTerms.length + + "] but should be equal to number of shards [" + numberOfShards() + "]"); + } + + return new IndexMetaData(index, version, primaryTerms, state, tmpSettings, mappings.build(), tmpAliases.build(), customs.build()); + } + + static final class Fields { + static final XContentBuilderString VERSION = new XContentBuilderString("version"); + static final XContentBuilderString SETTINGS = new XContentBuilderString("settings"); + static final XContentBuilderString STATE = new XContentBuilderString("state"); + static final XContentBuilderString MAPPINGS = new XContentBuilderString("mappings"); + static final XContentBuilderString ALIASES = new XContentBuilderString("aliases"); + static final XContentBuilderString PRIMARY_TERMS = new XContentBuilderString("primary_terms"); } public static void toXContent(IndexMetaData indexMetaData, XContentBuilder builder, ToXContent.Params params) throws IOException { builder.startObject(indexMetaData.getIndex(), XContentBuilder.FieldCaseConversion.NONE); - builder.field("version", indexMetaData.getVersion()); - builder.field("state", indexMetaData.getState().toString().toLowerCase(Locale.ENGLISH)); + builder.field(Fields.VERSION, indexMetaData.getVersion()); + builder.field(Fields.STATE, indexMetaData.getState().toString().toLowerCase(Locale.ENGLISH)); boolean binary = params.paramAsBoolean("binary", false); - builder.startObject("settings"); + builder.startObject(Fields.SETTINGS); for (Map.Entry entry : indexMetaData.getSettings().getAsMap().entrySet()) { builder.field(entry.getKey(), entry.getValue()); } builder.endObject(); - builder.startArray("mappings"); + builder.startArray(Fields.MAPPINGS); for (ObjectObjectCursor cursor : indexMetaData.getMappings()) { if (binary) { builder.value(cursor.value.source().compressed()); @@ -751,16 +836,26 @@ public static void toXContent(IndexMetaData indexMetaData, XContentBuilder build builder.endObject(); } - builder.startObject("aliases"); + builder.startObject(Fields.ALIASES); for (ObjectCursor cursor : indexMetaData.getAliases().values()) { AliasMetaData.Builder.toXContent(cursor.value, builder, params); } builder.endObject(); + builder.startArray(Fields.PRIMARY_TERMS); + for (int i = 0; i < indexMetaData.getNumberOfShards(); i++) { + builder.value(indexMetaData.primaryTerm(i)); + } + builder.endArray(); builder.endObject(); } + // TODO move it somewhere where it will be useful for other code? + private static boolean fieldEquals(XContentBuilderString field, String currentFieldName) { + return field.underscore().getValue().equals(currentFieldName); + } + public static IndexMetaData fromXContent(XContentParser parser) throws IOException { if (parser.currentToken() == null) { // fresh parser? move to the first token parser.nextToken(); @@ -776,9 +871,9 @@ public static IndexMetaData fromXContent(XContentParser parser) throws IOExcepti if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_OBJECT) { - if ("settings".equals(currentFieldName)) { + if (fieldEquals(Fields.SETTINGS, currentFieldName)) { builder.settings(Settings.settingsBuilder().put(SettingsLoader.Helper.loadNestedFromMap(parser.mapOrdered()))); - } else if ("mappings".equals(currentFieldName)) { + } else if (fieldEquals(Fields.MAPPINGS, currentFieldName)) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); @@ -788,7 +883,7 @@ public static IndexMetaData fromXContent(XContentParser parser) throws IOExcepti builder.putMapping(new MappingMetaData(mappingType, mappingSource)); } } - } else if ("aliases".equals(currentFieldName)) { + } else if (fieldEquals(Fields.ALIASES, currentFieldName)) { while (parser.nextToken() != XContentParser.Token.END_OBJECT) { builder.putAlias(AliasMetaData.Builder.fromXContent(parser)); } @@ -804,7 +899,7 @@ public static IndexMetaData fromXContent(XContentParser parser) throws IOExcepti } } } else if (token == XContentParser.Token.START_ARRAY) { - if ("mappings".equals(currentFieldName)) { + if (fieldEquals(Fields.MAPPINGS, currentFieldName)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token == XContentParser.Token.VALUE_EMBEDDED_OBJECT) { builder.putMapping(new MappingMetaData(new CompressedXContent(parser.binaryValue()))); @@ -816,11 +911,21 @@ public static IndexMetaData fromXContent(XContentParser parser) throws IOExcepti } } } + } else if (fieldEquals(Fields.PRIMARY_TERMS, currentFieldName)) { + LongArrayList list = new LongArrayList(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.VALUE_NUMBER) { + list.add(parser.longValue()); + } else { + throw new IllegalStateException("found a non-numeric value under [" + Fields.PRIMARY_TERMS.underscore() + "]"); + } + } + builder.primaryTerms(list.toArray()); } } else if (token.isValue()) { - if ("state".equals(currentFieldName)) { + if (fieldEquals(Fields.STATE, currentFieldName)) { builder.state(State.fromString(parser.text())); - } else if ("version".equals(currentFieldName)) { + } else if (fieldEquals(Fields.VERSION, currentFieldName)) { builder.version(parser.longValue()); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java b/core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java index ca071c811e3e1..62054670e214f 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java @@ -99,14 +99,17 @@ public String getIndex() { } /** - * creates a new {@link IndexRoutingTable} with all shard versions normalized + * creates a new {@link IndexRoutingTable} with all shard versions & primary terms set to the highest found. + * This allows incrementing {@link ShardRouting#version()} and {@link ShardRouting#primaryTerm()} where we work on + * the individual shards without worrying about synchronization between {@link ShardRouting} instances. This method + * takes care of it. * * @return new {@link IndexRoutingTable} */ - public IndexRoutingTable normalizeVersions() { + public IndexRoutingTable normalizeVersionsAndPrimaryTerms() { IndexRoutingTable.Builder builder = new Builder(this.index); for (IntObjectCursor cursor : shards) { - builder.addIndexShard(cursor.value.normalizeVersions()); + builder.addIndexShard(cursor.value.normalizeVersionsAndPrimaryTerms()); } return builder.build(); } @@ -422,11 +425,12 @@ private Builder initializeAsRestore(IndexMetaData indexMetaData, RestoreSource r for (int shardId = 0; shardId < indexMetaData.getNumberOfShards(); shardId++) { IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(new ShardId(indexMetaData.getIndex(), shardId)); for (int i = 0; i <= indexMetaData.getNumberOfReplicas(); i++) { + final long primaryTerm = indexMetaData.primaryTerm(shardId); if (asNew && ignoreShards.contains(shardId)) { // This shards wasn't completely snapshotted - restore it as new shard - indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(index, shardId, null, i == 0, unassignedInfo)); + indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(index, shardId, null, primaryTerm, i == 0, unassignedInfo)); } else { - indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(index, shardId, i == 0 ? restoreSource : null, i == 0, unassignedInfo)); + indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(index, shardId, i == 0 ? restoreSource : null, primaryTerm, i == 0, unassignedInfo)); } } shards.put(shardId, indexShardRoutingBuilder.build()); @@ -442,9 +446,10 @@ private Builder initializeEmpty(IndexMetaData indexMetaData, UnassignedInfo unas throw new IllegalStateException("trying to initialize an index with fresh shards, but already has shards created"); } for (int shardId = 0; shardId < indexMetaData.getNumberOfShards(); shardId++) { + final long primaryTerm = indexMetaData.primaryTerm(shardId); IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(new ShardId(indexMetaData.getIndex(), shardId)); for (int i = 0; i <= indexMetaData.getNumberOfReplicas(); i++) { - indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(index, shardId, null, i == 0, unassignedInfo)); + indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(index, shardId, null,primaryTerm, i == 0, unassignedInfo)); } shards.put(shardId, indexShardRoutingBuilder.build()); } @@ -455,9 +460,11 @@ public Builder addReplica() { for (IntCursor cursor : shards.keys()) { int shardId = cursor.value; // version 0, will get updated when reroute will happen - ShardRouting shard = ShardRouting.newUnassigned(index, shardId, null, false, new UnassignedInfo(UnassignedInfo.Reason.REPLICA_ADDED, null)); + final IndexShardRoutingTable shardRoutingTable = shards.get(shardId); + ShardRouting shard = ShardRouting.newUnassigned(index, shardId, null, shardRoutingTable.primary.primaryTerm(), false, + new UnassignedInfo(UnassignedInfo.Reason.REPLICA_ADDED, null)); shards.put(shardId, - new IndexShardRoutingTable.Builder(shards.get(shard.id())).addShard(shard).build() + new IndexShardRoutingTable.Builder(shardRoutingTable).addShard(shard).build() ); } return this; diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/IndexShardRoutingTable.java b/core/src/main/java/org/elasticsearch/cluster/routing/IndexShardRoutingTable.java index 6512ee5cef799..fa3da3779ac46 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/IndexShardRoutingTable.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/IndexShardRoutingTable.java @@ -28,14 +28,7 @@ import org.elasticsearch.index.shard.ShardId; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.concurrent.ThreadLocalRandom; import static java.util.Collections.emptyMap; @@ -120,34 +113,37 @@ public class IndexShardRoutingTable implements Iterable { } /** - * Normalizes all shard routings to the same version. + * Normalizes all shard routings to the same (highest found) version & primary terms. */ - public IndexShardRoutingTable normalizeVersions() { + public IndexShardRoutingTable normalizeVersionsAndPrimaryTerms() { if (shards.isEmpty()) { return this; } + if (shards.size() == 1) { return this; } long highestVersion = shards.get(0).version(); + long highestPrimaryTerm = shards.get(0).primaryTerm(); boolean requiresNormalization = false; for (int i = 1; i < shards.size(); i++) { - if (shards.get(i).version() != highestVersion) { + final long version = shards.get(i).version(); + final long primaryTerm = shards.get(i).primaryTerm(); + if (highestVersion != version || highestPrimaryTerm != primaryTerm) { requiresNormalization = true; } - if (shards.get(i).version() > highestVersion) { - highestVersion = shards.get(i).version(); - } + highestVersion = Math.max(highestVersion, version); + highestPrimaryTerm = Math.max(highestPrimaryTerm, primaryTerm); } if (!requiresNormalization) { return this; } List shardRoutings = new ArrayList<>(shards.size()); for (int i = 0; i < shards.size(); i++) { - if (shards.get(i).version() == highestVersion) { + if (shards.get(i).version() == highestVersion && shards.get(i).primaryTerm() == highestPrimaryTerm) { shardRoutings.add(shards.get(i)); } else { - shardRoutings.add(new ShardRouting(shards.get(i), highestVersion)); + shardRoutings.add(new ShardRouting(shards.get(i), highestVersion, highestPrimaryTerm)); } } return new IndexShardRoutingTable(shardId, Collections.unmodifiableList(shardRoutings)); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java index d5ed922b1206c..8eff3a3438272 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java @@ -609,6 +609,8 @@ public ShardRouting next() { /** * Initializes the current unassigned shard and moves it from the unassigned list. + * + * If a primary is initalized, it's term is incremented. */ public void initialize(String nodeId, long version, long expectedShardSize) { innerRemove(); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java index d27c1c098ae67..59dd5fcd9aada 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java @@ -22,7 +22,6 @@ import com.carrotsearch.hppc.IntSet; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.carrotsearch.hppc.cursors.ObjectObjectCursor; - import org.elasticsearch.cluster.Diff; import org.elasticsearch.cluster.Diffable; import org.elasticsearch.cluster.DiffableUtils; @@ -35,12 +34,7 @@ import org.elasticsearch.index.IndexNotFoundException; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.function.Predicate; /** @@ -540,7 +534,7 @@ public RoutingTable build() { } // normalize the versions right before we build it... for (ObjectCursor indexRoutingTable : indicesRouting.values()) { - indicesRouting.put(indexRoutingTable.value.index(), indexRoutingTable.value.normalizeVersions()); + indicesRouting.put(indexRoutingTable.value.index(), indexRoutingTable.value.normalizeVersionsAndPrimaryTerms()); } RoutingTable table = new RoutingTable(version, indicesRouting.build()); indicesRouting = null; diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java b/core/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java index bc90241811a16..d5f9bd602e70e 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java @@ -47,6 +47,7 @@ public final class ShardRouting implements Streamable, ToXContent { private String currentNodeId; private String relocatingNodeId; private boolean primary; + private long primaryTerm; private ShardRoutingState state; private long version; private RestoreSource restoreSource; @@ -62,11 +63,15 @@ private ShardRouting() { } public ShardRouting(ShardRouting copy) { - this(copy, copy.version()); + this(copy, copy.version(), copy.primaryTerm()); } public ShardRouting(ShardRouting copy, long version) { - this(copy.index(), copy.id(), copy.currentNodeId(), copy.relocatingNodeId(), copy.restoreSource(), copy.primary(), copy.state(), version, copy.unassignedInfo(), copy.allocationId(), true, copy.getExpectedShardSize()); + this(copy, version, copy.primaryTerm()); + } + + public ShardRouting(ShardRouting copy, long version, long primaryTerm) { + this(copy.index(), copy.id(), copy.currentNodeId(), copy.relocatingNodeId(), copy.restoreSource(), primaryTerm, copy.primary(), copy.state(), version, copy.unassignedInfo(), copy.allocationId(), true, copy.getExpectedShardSize()); } /** @@ -74,12 +79,13 @@ public ShardRouting(ShardRouting copy, long version) { * by either this class or tests. Visible for testing. */ ShardRouting(String index, int shardId, String currentNodeId, - String relocatingNodeId, RestoreSource restoreSource, boolean primary, ShardRoutingState state, long version, + String relocatingNodeId, RestoreSource restoreSource, long primaryTerm, boolean primary, ShardRoutingState state, long version, UnassignedInfo unassignedInfo, AllocationId allocationId, boolean internal, long expectedShardSize) { this.index = index; this.shardId = shardId; this.currentNodeId = currentNodeId; this.relocatingNodeId = relocatingNodeId; + this.primaryTerm = primaryTerm; this.primary = primary; this.state = state; this.asList = Collections.singletonList(this); @@ -103,8 +109,8 @@ public ShardRouting(ShardRouting copy, long version) { /** * Creates a new unassigned shard. */ - public static ShardRouting newUnassigned(String index, int shardId, RestoreSource restoreSource, boolean primary, UnassignedInfo unassignedInfo) { - return new ShardRouting(index, shardId, null, null, restoreSource, primary, ShardRoutingState.UNASSIGNED, 0, unassignedInfo, null, true, UNAVAILABLE_EXPECTED_SHARD_SIZE); + public static ShardRouting newUnassigned(String index, int shardId, RestoreSource restoreSource, long primaryTerm, boolean primary, UnassignedInfo unassignedInfo) { + return new ShardRouting(index, shardId, null, null, restoreSource, primaryTerm, primary, ShardRoutingState.UNASSIGNED, 0, unassignedInfo, null, true, UNAVAILABLE_EXPECTED_SHARD_SIZE); } /** @@ -214,7 +220,7 @@ public String relocatingNodeId() { */ public ShardRouting buildTargetRelocatingShard() { assert relocating(); - return new ShardRouting(index, shardId, relocatingNodeId, currentNodeId, restoreSource, primary, ShardRoutingState.INITIALIZING, version, unassignedInfo, + return new ShardRouting(index, shardId, relocatingNodeId, currentNodeId, restoreSource, primaryTerm, primary, ShardRoutingState.INITIALIZING, version, unassignedInfo, AllocationId.newTargetRelocation(allocationId), true, expectedShardSize); } @@ -249,6 +255,16 @@ public boolean primary() { return this.primary; } + /** + * Returns the term of the current primary shard for this shard. + * The term is incremented with every primary promotion/initial assignment. + * + * See {@link org.elasticsearch.cluster.metadata.IndexMetaData#primaryTerm(int)} for more info. + */ + public long primaryTerm() { + return this.primaryTerm; + } + /** * The shard state. */ @@ -318,6 +334,7 @@ public void readFromThin(StreamInput in) throws IOException { } primary = in.readBoolean(); + primaryTerm = in.readVLong(); state = ShardRoutingState.fromValue(in.readByte()); restoreSource = RestoreSource.readOptionalRestoreSource(in); @@ -363,6 +380,7 @@ public void writeToThin(StreamOutput out) throws IOException { } out.writeBoolean(primary); + out.writeVLong(primaryTerm); out.writeByte(state.value()); if (restoreSource != null) { @@ -420,7 +438,7 @@ void moveToUnassigned(UnassignedInfo unassignedInfo) { } /** - * Initializes an unassigned shard on a node. + * Initializes an unassigned shard on a node. If the shard is primary, it's term is incremented. */ void initialize(String nodeId, long expectedShardSize) { ensureNotFrozen(); @@ -430,6 +448,9 @@ void initialize(String nodeId, long expectedShardSize) { state = ShardRoutingState.INITIALIZING; currentNodeId = nodeId; allocationId = AllocationId.newInitializing(); + if (primary) { + primaryTerm++; + } this.expectedShardSize = expectedShardSize; } @@ -507,6 +528,7 @@ void moveToPrimary() { throw new IllegalShardRoutingStateException(this, "Already primary, can't move to primary"); } primary = true; + primaryTerm++; } /** @@ -562,6 +584,9 @@ public boolean isRelocationTargetOf(ShardRouting other) { assert b == false || this.primary == other.primary : "ShardRouting is a relocation target but primary flag is different. This [" + this + "], target [" + other + "]"; + assert b == false || this.primaryTerm == other.primaryTerm : + "ShardRouting is a relocation target but primary term is different. This [" + this + "], target [" + other + "]"; + return b; } @@ -589,10 +614,13 @@ public boolean isRelocationSourceOf(ShardRouting other) { assert b == false || this.primary == other.primary : "ShardRouting is a relocation source but primary flag is different. This [" + this + "], target [" + other + "]"; + assert b == false || this.primaryTerm == other.primaryTerm : + "ShardRouting is a relocation source but primary term is different. This [" + this + "], target [" + other + "]"; + return b; } - /** returns true if the current routing is identical to the other routing in all but meta fields, i.e., version and unassigned info */ + /** returns true if the current routing is identical to the other routing in all but meta fields, i.e., version, primary term and unassigned info */ public boolean equalsIgnoringMetaData(ShardRouting other) { if (primary != other.primary) { return false; @@ -637,6 +665,9 @@ public boolean equals(Object o) { if (unassignedInfo != null ? !unassignedInfo.equals(that.unassignedInfo) : that.unassignedInfo != null) { return false; } + if (primaryTerm != that.primaryTerm) { + return false; + } return equalsIgnoringMetaData(that); } @@ -653,6 +684,7 @@ public int hashCode() { result = 31 * result + (currentNodeId != null ? currentNodeId.hashCode() : 0); result = 31 * result + (relocatingNodeId != null ? relocatingNodeId.hashCode() : 0); result = 31 * result + (primary ? 1 : 0); + result = 31 * result + Long.hashCode(primaryTerm); result = 31 * result + (state != null ? state.hashCode() : 0); result = 31 * result + Long.hashCode(version); result = 31 * result + (restoreSource != null ? restoreSource.hashCode() : 0); @@ -682,6 +714,7 @@ public String shortSummary() { sb.append("[R]"); } sb.append(", v[").append(version).append("]"); + sb.append(", t[").append(primaryTerm).append("]"); if (this.restoreSource != null) { sb.append(", restoring[" + restoreSource + "]"); } @@ -703,6 +736,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject() .field("state", state()) .field("primary", primary()) + .field("primary_term", primaryTerm()) .field("node", currentNodeId()) .field("relocating_node", relocatingNodeId()) .field("shard", shardId().id()) diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index de3a763e205e5..d34b016e339e9 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -23,13 +23,9 @@ import org.elasticsearch.cluster.ClusterInfoService; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.routing.IndexRoutingTable; -import org.elasticsearch.cluster.routing.RoutingNode; -import org.elasticsearch.cluster.routing.RoutingNodes; -import org.elasticsearch.cluster.routing.RoutingTable; -import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.*; import org.elasticsearch.cluster.routing.allocation.allocator.ShardsAllocators; import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; @@ -79,13 +75,70 @@ public RoutingAllocation.Result applyStartedShards(ClusterState clusterState, Li StartedRerouteAllocation allocation = new StartedRerouteAllocation(allocationDeciders, routingNodes, clusterState.nodes(), startedShards, clusterInfoService.getClusterInfo()); boolean changed = applyStartedShards(routingNodes, startedShards); if (!changed) { - return new RoutingAllocation.Result(false, clusterState.routingTable()); + return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); } shardsAllocators.applyStartedShards(allocation); if (withReroute) { reroute(allocation); } - return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData())); + return buildChangedResult(clusterState.metaData(), routingNodes); + } + + + protected RoutingAllocation.Result buildChangedResult(MetaData metaData, RoutingNodes routingNodes) { + return buildChangedResult(metaData, routingNodes, new RoutingExplanations()); + + } + protected RoutingAllocation.Result buildChangedResult(MetaData metaData, RoutingNodes routingNodes, RoutingExplanations explanations) { + final RoutingTable routingTable = new RoutingTable.Builder().updateNodes(routingNodes).build(); + MetaData newMetaData = updateMetaDataWithRoutingTable(metaData,routingTable); + return new RoutingAllocation.Result(true, routingTable.validateRaiseException(newMetaData), newMetaData, explanations); + } + + /** + * Updates the current {@link MetaData} based on the newly created {@link RoutingTable}. + * + * @param currentMetaData {@link MetaData} object from before the routing table was changed. + * @param newRoutingTable new {@link RoutingTable} created by the allocation change + * @return adpated {@link MetaData}, potentially the original one if no change was needed. + */ + static MetaData updateMetaDataWithRoutingTable(MetaData currentMetaData, RoutingTable newRoutingTable) { + // make sure index meta data and routing tables are in sync w.r.t primaryTerm + MetaData.Builder metaDataBuilder = null; + for (IndexRoutingTable indexRoutingTable : newRoutingTable) { + final IndexMetaData indexMetaData = currentMetaData.index(indexRoutingTable.getIndex()); + if (indexMetaData == null) { + throw new IllegalStateException("no metadata found for index [" + indexRoutingTable.index() + "]"); + } + IndexMetaData.Builder indexMetaDataBuilder = null; + for (IndexShardRoutingTable shardRoutings : indexRoutingTable) { + final ShardRouting primary = shardRoutings.primaryShard(); + if (primary == null) { + throw new IllegalStateException("missing primary shard for " + shardRoutings.shardId()); + } + final int shardId = primary.shardId().id(); + if (primary.primaryTerm() != indexMetaData.primaryTerm(shardId)) { + assert primary.primaryTerm() > indexMetaData.primaryTerm(shardId) : + "primary term should only increase. Index primary term [" + + indexMetaData.primaryTerm(shardId) + "] but primary routing is " + primary; + if (indexMetaDataBuilder == null) { + indexMetaDataBuilder = IndexMetaData.builder(indexMetaData); + } + indexMetaDataBuilder.primaryTerm(shardId, primary.primaryTerm()); + } + } + if (indexMetaDataBuilder != null) { + if (metaDataBuilder == null) { + metaDataBuilder = MetaData.builder(currentMetaData); + } + metaDataBuilder.put(indexMetaDataBuilder); + } + } + if (metaDataBuilder != null) { + return metaDataBuilder.build(); + } else { + return currentMetaData; + } } public RoutingAllocation.Result applyFailedShard(ClusterState clusterState, ShardRouting failedShard) { @@ -107,11 +160,11 @@ public RoutingAllocation.Result applyFailedShards(ClusterState clusterState, Lis changed |= applyFailedShard(allocation, failedShard.shard, true, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShard.message, failedShard.failure)); } if (!changed) { - return new RoutingAllocation.Result(false, clusterState.routingTable()); + return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); } shardsAllocators.applyFailedShards(allocation); reroute(allocation); - return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData())); + return buildChangedResult(clusterState.metaData(), routingNodes); } public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands) { @@ -134,9 +187,12 @@ public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCom // the assumption is that commands will move / act on shards (or fail through exceptions) // so, there will always be shard "movements", so no need to check on reroute reroute(allocation); - return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()), explanations); + + return buildChangedResult(clusterState.metaData(), routingNodes, explanations); } + + /** * Reroutes the routing table based on the live nodes. *

@@ -158,9 +214,9 @@ public RoutingAllocation.Result reroute(ClusterState clusterState, boolean debug RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState.nodes(), clusterInfoService.getClusterInfo()); allocation.debugDecision(debug); if (!reroute(allocation)) { - return new RoutingAllocation.Result(false, clusterState.routingTable()); + return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); } - return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData())); + return buildChangedResult(clusterState.metaData(), routingNodes); } private boolean reroute(RoutingAllocation allocation) { diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index 1874a7b020b6d..678b855841b31 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -52,29 +52,33 @@ public static class Result { private final RoutingTable routingTable; + private final MetaData metaData; + private RoutingExplanations explanations = new RoutingExplanations(); /** * Creates a new {@link RoutingAllocation.Result} - * - * @param changed a flag to determine whether the actual {@link RoutingTable} has been changed + * @param changed a flag to determine whether the actual {@link RoutingTable} has been changed * @param routingTable the {@link RoutingTable} this Result references + * @param metaData the {@link MetaData} this result refrences */ - public Result(boolean changed, RoutingTable routingTable) { + public Result(boolean changed, RoutingTable routingTable, MetaData metaData) { this.changed = changed; this.routingTable = routingTable; + this.metaData = metaData; } /** * Creates a new {@link RoutingAllocation.Result} - * - * @param changed a flag to determine whether the actual {@link RoutingTable} has been changed + * @param changed a flag to determine whether the actual {@link RoutingTable} has been changed * @param routingTable the {@link RoutingTable} this Result references + * @param metaData the {@link MetaData} this Result references * @param explanations Explanation for the reroute actions */ - public Result(boolean changed, RoutingTable routingTable, RoutingExplanations explanations) { + public Result(boolean changed, RoutingTable routingTable, MetaData metaData, RoutingExplanations explanations) { this.changed = changed; this.routingTable = routingTable; + this.metaData = metaData; this.explanations = explanations; } @@ -85,6 +89,14 @@ public boolean changed() { return this.changed; } + /** + * Get the {@link MetaData} referenced by this result + * @return referenced {@link MetaData} + */ + public MetaData metaData() { + return metaData; + } + /** * Get the {@link RoutingTable} referenced by this result * @return referenced {@link RoutingTable} diff --git a/core/src/test/java/org/elasticsearch/ExceptionSerializationTests.java b/core/src/test/java/org/elasticsearch/ExceptionSerializationTests.java index 55dc2e4211380..b65ab41e5b6e5 100644 --- a/core/src/test/java/org/elasticsearch/ExceptionSerializationTests.java +++ b/core/src/test/java/org/elasticsearch/ExceptionSerializationTests.java @@ -206,7 +206,7 @@ private T serialize(T exception) throws IOException { } public void testIllegalShardRoutingStateException() throws IOException { - final ShardRouting routing = TestShardRouting.newShardRouting("test", 0, "xyz", "def", false, ShardRoutingState.STARTED, 0); + final ShardRouting routing = TestShardRouting.newShardRouting("test", 0, "xyz", "def", 1, false, ShardRoutingState.STARTED, 0); final String routingAsString = routing.toString(); IllegalShardRoutingStateException serialize = serialize(new IllegalShardRoutingStateException(routing, "foo", new NullPointerException())); assertNotNull(serialize.shard()); diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java index ba3b10e3db683..1baae96d8195a 100644 --- a/core/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java +++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java @@ -131,11 +131,14 @@ private ShardRouting genShardRouting(String index, int shardId, boolean primary) switch (state) { case STARTED: - return TestShardRouting.newShardRouting(index, shardId, "node_" + Integer.toString(node_id++), null, null, primary, ShardRoutingState.STARTED, 1); + return TestShardRouting.newShardRouting(index, shardId, "node_" + Integer.toString(node_id++), null, null, 1, primary, + ShardRoutingState.STARTED, 1); case INITIALIZING: - return TestShardRouting.newShardRouting(index, shardId, "node_" + Integer.toString(node_id++), null, null, primary, ShardRoutingState.INITIALIZING, 1); + return TestShardRouting.newShardRouting(index, shardId, "node_" + Integer.toString(node_id++), null, null, 1, primary, + ShardRoutingState.INITIALIZING, 1); case RELOCATING: - return TestShardRouting.newShardRouting(index, shardId, "node_" + Integer.toString(node_id++), "node_" + Integer.toString(node_id++), null, primary, ShardRoutingState.RELOCATING, 1); + return TestShardRouting.newShardRouting(index, shardId, "node_" + Integer.toString(node_id++), + "node_" + Integer.toString(node_id++), null, 1, primary, ShardRoutingState.RELOCATING, 1); default: throw new ElasticsearchException("Unknown state: " + state.name()); } diff --git a/core/src/test/java/org/elasticsearch/action/support/broadcast/node/TransportBroadcastByNodeActionTests.java b/core/src/test/java/org/elasticsearch/action/support/broadcast/node/TransportBroadcastByNodeActionTests.java index 18a86b1dea0c2..4cdf1bd8f8e1e 100644 --- a/core/src/test/java/org/elasticsearch/action/support/broadcast/node/TransportBroadcastByNodeActionTests.java +++ b/core/src/test/java/org/elasticsearch/action/support/broadcast/node/TransportBroadcastByNodeActionTests.java @@ -35,16 +35,12 @@ import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.block.ClusterBlocks; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.cluster.routing.IndexRoutingTable; -import org.elasticsearch.cluster.routing.IndexShardRoutingTable; -import org.elasticsearch.cluster.routing.RoutingTable; -import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.cluster.routing.ShardRoutingState; -import org.elasticsearch.cluster.routing.ShardsIterator; -import org.elasticsearch.cluster.routing.TestShardRouting; +import org.elasticsearch.cluster.routing.*; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.DummyTransportAddress; @@ -63,12 +59,7 @@ import org.junit.BeforeClass; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; @@ -203,13 +194,16 @@ void setClusterState(TestClusterService clusterService, String index) { IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index); int shardIndex = -1; + int totalIndexShards = 0; for (int i = 0; i < numberOfNodes; i++) { final DiscoveryNode node = newNode(i); discoBuilder = discoBuilder.put(node); int numberOfShards = randomIntBetween(1, 10); + totalIndexShards += numberOfShards; for (int j = 0; j < numberOfShards; j++) { final ShardId shardId = new ShardId(index, ++shardIndex); - ShardRouting shard = TestShardRouting.newShardRouting(index, shardId.getId(), node.id(), true, ShardRoutingState.STARTED, 1); + final int primaryTerm = randomInt(200); + ShardRouting shard = TestShardRouting.newShardRouting(index, shardId.getId(), node.id(), primaryTerm, true, ShardRoutingState.STARTED, 1); IndexShardRoutingTable.Builder indexShard = new IndexShardRoutingTable.Builder(shardId); indexShard.addShard(shard); indexRoutingTable.addIndexShard(indexShard.build()); @@ -219,6 +213,12 @@ void setClusterState(TestClusterService clusterService, String index) { discoBuilder.masterNodeId(newNode(numberOfNodes - 1).id()); ClusterState.Builder stateBuilder = ClusterState.builder(new ClusterName(TEST_CLUSTER)); stateBuilder.nodes(discoBuilder); + final IndexMetaData.Builder indexMetaData = IndexMetaData.builder(index) + .settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)) + .numberOfReplicas(0) + .numberOfShards(totalIndexShards); + + stateBuilder.metaData(MetaData.builder().put(indexMetaData)); stateBuilder.routingTable(RoutingTable.builder().add(indexRoutingTable.build()).build()); ClusterState clusterState = stateBuilder.build(); clusterService.setState(clusterState); @@ -310,7 +310,7 @@ public void testOperationExecution() throws Exception { TransportResponse response = channel.getCapturedResponse(); assertTrue(response instanceof TransportBroadcastByNodeAction.NodeResponse); - TransportBroadcastByNodeAction.NodeResponse nodeResponse = (TransportBroadcastByNodeAction.NodeResponse)response; + TransportBroadcastByNodeAction.NodeResponse nodeResponse = (TransportBroadcastByNodeAction.NodeResponse) response; // check the operation was executed on the correct node assertEquals("node id", nodeId, nodeResponse.getNodeId()); diff --git a/core/src/test/java/org/elasticsearch/action/support/replication/BroadcastReplicationTests.java b/core/src/test/java/org/elasticsearch/action/support/replication/BroadcastReplicationTests.java index d31a024187c40..a7fc01e9677a6 100644 --- a/core/src/test/java/org/elasticsearch/action/support/replication/BroadcastReplicationTests.java +++ b/core/src/test/java/org/elasticsearch/action/support/replication/BroadcastReplicationTests.java @@ -131,7 +131,7 @@ public void testStartedPrimary() throws InterruptedException, ExecutionException public void testResultCombine() throws InterruptedException, ExecutionException, IOException { final String index = "test"; - int numShards = randomInt(3); + int numShards = 1 + randomInt(3); clusterService.setState(stateWithAssignedPrimariesAndOneReplica(index, numShards)); logger.debug("--> using initial state:\n{}", clusterService.state().prettyPrint()); Future response = (broadcastReplicationAction.execute(new BroadcastRequest().indices(index))); diff --git a/core/src/test/java/org/elasticsearch/action/support/replication/ClusterStateCreationUtils.java b/core/src/test/java/org/elasticsearch/action/support/replication/ClusterStateCreationUtils.java index 913d52d5b173e..fb7c59f353c66 100644 --- a/core/src/test/java/org/elasticsearch/action/support/replication/ClusterStateCreationUtils.java +++ b/core/src/test/java/org/elasticsearch/action/support/replication/ClusterStateCreationUtils.java @@ -40,12 +40,8 @@ import java.util.HashSet; import java.util.Set; -import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_CREATION_DATE; -import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; -import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; -import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_VERSION_CREATED; -import static org.elasticsearch.test.ESTestCase.randomFrom; -import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.cluster.metadata.IndexMetaData.*; +import static org.elasticsearch.test.ESTestCase.*; /** * Helper methods for generating cluster states @@ -109,7 +105,9 @@ public static ClusterState state(String index, boolean primaryLocal, ShardRoutin } else { unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null); } - indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting(index, 0, primaryNode, relocatingNode, null, true, primaryState, 0, unassignedInfo)); + final int primaryTerm = randomInt(200); + indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting(index, 0, primaryNode, relocatingNode, null, primaryTerm, + true, primaryState, 0, unassignedInfo)); for (ShardRoutingState replicaState : replicaStates) { String replicaNode = null; @@ -125,7 +123,8 @@ public static ClusterState state(String index, boolean primaryLocal, ShardRoutin unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null); } indexShardRoutingBuilder.addShard( - TestShardRouting.newShardRouting(index, shardId.id(), replicaNode, relocatingNode, null, false, replicaState, 0, unassignedInfo)); + TestShardRouting.newShardRouting(index, shardId.id(), replicaNode, relocatingNode, null, primaryTerm, false, + replicaState, 0, unassignedInfo)); } ClusterState.Builder state = ClusterState.builder(new ClusterName("test")); @@ -150,19 +149,22 @@ public static ClusterState stateWithAssignedPrimariesAndOneReplica(String index, discoBuilder.masterNodeId(newNode(1).id()); // we need a non-local master to test shard failures IndexMetaData indexMetaData = IndexMetaData.builder(index).settings(Settings.builder() .put(SETTING_VERSION_CREATED, Version.CURRENT) - .put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 1) + .put(SETTING_NUMBER_OF_SHARDS, numberOfShards).put(SETTING_NUMBER_OF_REPLICAS, 1) .put(SETTING_CREATION_DATE, System.currentTimeMillis())).build(); ClusterState.Builder state = ClusterState.builder(new ClusterName("test")); state.nodes(discoBuilder); state.metaData(MetaData.builder().put(indexMetaData, false).generateClusterUuidIfNeeded()); IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(index); + final int primaryTerm = randomInt(200); for (int i = 0; i < numberOfShards; i++) { RoutingTable.Builder routing = new RoutingTable.Builder(); routing.addAsNew(indexMetaData); final ShardId shardId = new ShardId(index, i); IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); - indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting(index, i, newNode(0).id(), null, null, true, ShardRoutingState.STARTED, 0, null)); - indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting(index, i, newNode(1).id(), null, null, false, ShardRoutingState.STARTED, 0, null)); + indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting(index, i, newNode(0).id(), null, null, primaryTerm, true, + ShardRoutingState.STARTED, 0, null)); + indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting(index, i, newNode(1).id(), null, null, primaryTerm, false, + ShardRoutingState.STARTED, 0, null)); indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilder.build()); } state.routingTable(RoutingTable.builder().add(indexRoutingTableBuilder.build()).build()); diff --git a/core/src/test/java/org/elasticsearch/cluster/ClusterStateDiffIT.java b/core/src/test/java/org/elasticsearch/cluster/ClusterStateDiffIT.java index c2e646dde19ce..6258f7b5d095c 100644 --- a/core/src/test/java/org/elasticsearch/cluster/ClusterStateDiffIT.java +++ b/core/src/test/java/org/elasticsearch/cluster/ClusterStateDiffIT.java @@ -233,13 +233,14 @@ private IndexRoutingTable randomIndexRoutingTable(String index, String[] nodeIds for (int i = 0; i < shardCount; i++) { IndexShardRoutingTable.Builder indexShard = new IndexShardRoutingTable.Builder(new ShardId(index, i)); int replicaCount = randomIntBetween(1, 10); + int term = randomInt(200); for (int j = 0; j < replicaCount; j++) { UnassignedInfo unassignedInfo = null; if (randomInt(5) == 1) { unassignedInfo = new UnassignedInfo(randomReason(), randomAsciiOfLength(10)); } indexShard.addShard( - TestShardRouting.newShardRouting(index, i, randomFrom(nodeIds), null, null, j == 0, + TestShardRouting.newShardRouting(index, i, randomFrom(nodeIds), null, null, term, j == 0, ShardRoutingState.fromValue((byte) randomIntBetween(2, 4)), 1, unassignedInfo)); } builder.addIndexShard(indexShard.build()); diff --git a/core/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java b/core/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java index e48ca834f5333..9271bc96fb886 100644 --- a/core/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java @@ -49,19 +49,19 @@ public void testDiskUsageCalc() { // Test that DiskUsage handles invalid numbers, as reported by some // filesystems (ZFS & NTFS) - DiskUsage du2 = new DiskUsage("node1", "n1","random", 100, 101); + DiskUsage du2 = new DiskUsage("node1", "n1", "random", 100, 101); assertThat(du2.getFreeDiskAsPercentage(), equalTo(101.0)); assertThat(du2.getFreeBytes(), equalTo(101L)); assertThat(du2.getUsedBytes(), equalTo(-1L)); assertThat(du2.getTotalBytes(), equalTo(100L)); - DiskUsage du3 = new DiskUsage("node1", "n1", "random",-1, -1); + DiskUsage du3 = new DiskUsage("node1", "n1", "random", -1, -1); assertThat(du3.getFreeDiskAsPercentage(), equalTo(100.0)); assertThat(du3.getFreeBytes(), equalTo(-1L)); assertThat(du3.getUsedBytes(), equalTo(0L)); assertThat(du3.getTotalBytes(), equalTo(-1L)); - DiskUsage du4 = new DiskUsage("node1", "n1","random", 0, 0); + DiskUsage du4 = new DiskUsage("node1", "n1", "random", 0, 0); assertThat(du4.getFreeDiskAsPercentage(), equalTo(100.0)); assertThat(du4.getFreeBytes(), equalTo(0L)); assertThat(du4.getUsedBytes(), equalTo(0L)); @@ -91,21 +91,21 @@ public void testRandomDiskUsage() { } public void testFillShardLevelInfo() { - ShardRouting test_0 = ShardRouting.newUnassigned("test", 0, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_0 = ShardRouting.newUnassigned("test", 0, null, 1, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_0, "node1"); ShardRoutingHelper.moveToStarted(test_0); Path test0Path = createTempDir().resolve("indices").resolve("test").resolve("0"); CommonStats commonStats0 = new CommonStats(); commonStats0.store = new StoreStats(100, 1); - ShardRouting test_1 = ShardRouting.newUnassigned("test", 1, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_1 = ShardRouting.newUnassigned("test", 1, null, 1, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_1, "node2"); ShardRoutingHelper.moveToStarted(test_1); Path test1Path = createTempDir().resolve("indices").resolve("test").resolve("1"); CommonStats commonStats1 = new CommonStats(); commonStats1.store = new StoreStats(1000, 1); - ShardStats[] stats = new ShardStats[] { - new ShardStats(test_0, new ShardPath(false, test0Path, test0Path, "0xdeadbeef", test_0.shardId()), commonStats0 , null), - new ShardStats(test_1, new ShardPath(false, test1Path, test1Path, "0xdeadbeef", test_1.shardId()), commonStats1 , null) + ShardStats[] stats = new ShardStats[]{ + new ShardStats(test_0, new ShardPath(false, test0Path, test0Path, "0xdeadbeef", test_0.shardId()), commonStats0, null), + new ShardStats(test_1, new ShardPath(false, test1Path, test1Path, "0xdeadbeef", test_1.shardId()), commonStats1, null) }; ImmutableOpenMap.Builder shardSizes = ImmutableOpenMap.builder(); ImmutableOpenMap.Builder routingToPath = ImmutableOpenMap.builder(); @@ -131,21 +131,21 @@ public void testFillDiskUsage() { new FsInfo.Path("/least", "/dev/sdb", 200, 190, 70), new FsInfo.Path("/most", "/dev/sdc", 300, 290, 280), }; - FsInfo.Path[] node2FSInfo = new FsInfo.Path[] { + FsInfo.Path[] node2FSInfo = new FsInfo.Path[]{ new FsInfo.Path("/least_most", "/dev/sda", 100, 90, 80), }; - FsInfo.Path[] node3FSInfo = new FsInfo.Path[] { + FsInfo.Path[] node3FSInfo = new FsInfo.Path[]{ new FsInfo.Path("/least", "/dev/sda", 100, 90, 70), new FsInfo.Path("/most", "/dev/sda", 100, 90, 80), }; - NodeStats[] nodeStats = new NodeStats[] { + NodeStats[] nodeStats = new NodeStats[]{ new NodeStats(new DiscoveryNode("node_1", DummyTransportAddress.INSTANCE, Version.CURRENT), 0, - null,null,null,null,null,new FsInfo(0, node1FSInfo), null,null,null,null), + null, null, null, null, null, new FsInfo(0, node1FSInfo), null, null, null, null), new NodeStats(new DiscoveryNode("node_2", DummyTransportAddress.INSTANCE, Version.CURRENT), 0, - null,null,null,null,null, new FsInfo(0, node2FSInfo), null,null,null,null), + null, null, null, null, null, new FsInfo(0, node2FSInfo), null, null, null, null), new NodeStats(new DiscoveryNode("node_3", DummyTransportAddress.INSTANCE, Version.CURRENT), 0, - null,null,null,null,null, new FsInfo(0, node3FSInfo), null,null,null,null) + null, null, null, null, null, new FsInfo(0, node3FSInfo), null, null, null, null) }; InternalClusterInfoService.fillDiskUsagePerNode(logger, nodeStats, newLeastAvaiableUsages, newMostAvaiableUsages); DiskUsage leastNode_1 = newLeastAvaiableUsages.get("node_1"); diff --git a/core/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java b/core/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java index cbb5b7dfbdba6..9ac5fd6ad27a5 100644 --- a/core/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java @@ -41,11 +41,14 @@ public void testSimpleJsonFromAndTo() throws IOException { .put(IndexMetaData.builder("test1") .settings(settings(Version.CURRENT)) .numberOfShards(1) - .numberOfReplicas(2)) + .numberOfReplicas(2) + .primaryTerm(0, 1)) .put(IndexMetaData.builder("test2") .settings(settings(Version.CURRENT).put("setting1", "value1").put("setting2", "value2")) .numberOfShards(2) - .numberOfReplicas(3)) + .numberOfReplicas(3) + .primaryTerm(0, 2) + .primaryTerm(1, 2)) .put(IndexMetaData.builder("test3") .settings(settings(Version.CURRENT)) .numberOfShards(1) @@ -112,15 +115,15 @@ public void testSimpleJsonFromAndTo() throws IOException { .putAlias(newAliasMetaDataBuilder("alias1").filter(ALIAS_FILTER1)) .putAlias(newAliasMetaDataBuilder("alias2")) .putAlias(newAliasMetaDataBuilder("alias4").filter(ALIAS_FILTER2))) - .put(IndexTemplateMetaData.builder("foo") - .template("bar") - .order(1) - .settings(settingsBuilder() - .put("setting1", "value1") - .put("setting2", "value2")) - .putAlias(newAliasMetaDataBuilder("alias-bar1")) - .putAlias(newAliasMetaDataBuilder("alias-bar2").filter("{\"term\":{\"user\":\"kimchy\"}}")) - .putAlias(newAliasMetaDataBuilder("alias-bar3").routing("routing-bar"))) + .put(IndexTemplateMetaData.builder("foo") + .template("bar") + .order(1) + .settings(settingsBuilder() + .put("setting1", "value1") + .put("setting2", "value2")) + .putAlias(newAliasMetaDataBuilder("alias-bar1")) + .putAlias(newAliasMetaDataBuilder("alias-bar2").filter("{\"term\":{\"user\":\"kimchy\"}}")) + .putAlias(newAliasMetaDataBuilder("alias-bar3").routing("routing-bar"))) .put(IndexMetaData.builder("test12") .settings(settings(Version.CURRENT) .put("setting1", "value1") @@ -133,15 +136,15 @@ public void testSimpleJsonFromAndTo() throws IOException { .putAlias(newAliasMetaDataBuilder("alias1").filter(ALIAS_FILTER1)) .putAlias(newAliasMetaDataBuilder("alias2")) .putAlias(newAliasMetaDataBuilder("alias4").filter(ALIAS_FILTER2))) - .put(IndexTemplateMetaData.builder("foo") - .template("bar") - .order(1) - .settings(settingsBuilder() - .put("setting1", "value1") - .put("setting2", "value2")) - .putAlias(newAliasMetaDataBuilder("alias-bar1")) - .putAlias(newAliasMetaDataBuilder("alias-bar2").filter("{\"term\":{\"user\":\"kimchy\"}}")) - .putAlias(newAliasMetaDataBuilder("alias-bar3").routing("routing-bar"))) + .put(IndexTemplateMetaData.builder("foo") + .template("bar") + .order(1) + .settings(settingsBuilder() + .put("setting1", "value1") + .put("setting2", "value2")) + .putAlias(newAliasMetaDataBuilder("alias-bar1")) + .putAlias(newAliasMetaDataBuilder("alias-bar2").filter("{\"term\":{\"user\":\"kimchy\"}}")) + .putAlias(newAliasMetaDataBuilder("alias-bar3").routing("routing-bar"))) .build(); String metaDataSource = MetaData.Builder.toXContent(metaData); @@ -150,6 +153,7 @@ public void testSimpleJsonFromAndTo() throws IOException { MetaData parsedMetaData = MetaData.Builder.fromXContent(XContentFactory.xContent(XContentType.JSON).createParser(metaDataSource)); IndexMetaData indexMetaData = parsedMetaData.index("test1"); + assertThat(indexMetaData.primaryTerm(0), equalTo(1l)); assertThat(indexMetaData.getNumberOfShards(), equalTo(1)); assertThat(indexMetaData.getNumberOfReplicas(), equalTo(2)); assertThat(indexMetaData.getCreationDate(), equalTo(-1l)); @@ -159,6 +163,8 @@ public void testSimpleJsonFromAndTo() throws IOException { indexMetaData = parsedMetaData.index("test2"); assertThat(indexMetaData.getNumberOfShards(), equalTo(2)); assertThat(indexMetaData.getNumberOfReplicas(), equalTo(3)); + assertThat(indexMetaData.primaryTerm(0), equalTo(2l)); + assertThat(indexMetaData.primaryTerm(1), equalTo(2l)); assertThat(indexMetaData.getCreationDate(), equalTo(-1l)); assertThat(indexMetaData.getSettings().getAsMap().size(), equalTo(5)); assertThat(indexMetaData.getSettings().get("setting1"), equalTo("value1")); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/AllocationIdTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/AllocationIdTests.java index 8d6953ed9290e..1c5db44e4d2f4 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/AllocationIdTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/AllocationIdTests.java @@ -31,7 +31,7 @@ public class AllocationIdTests extends ESTestCase { public void testShardToStarted() { logger.info("-- create unassigned shard"); - ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); + ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); assertThat(shard.allocationId(), nullValue()); logger.info("-- initialize the shard"); @@ -51,7 +51,7 @@ public void testShardToStarted() { public void testSuccessfulRelocation() { logger.info("-- build started shard"); - ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); + ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); shard.initialize("node1", -1); shard.moveToStarted(); @@ -74,7 +74,7 @@ public void testSuccessfulRelocation() { public void testCancelRelocation() { logger.info("-- build started shard"); - ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); + ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); shard.initialize("node1", -1); shard.moveToStarted(); @@ -94,7 +94,7 @@ public void testCancelRelocation() { public void testMoveToUnassigned() { logger.info("-- build started shard"); - ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); + ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); shard.initialize("node1", -1); shard.moveToStarted(); @@ -105,7 +105,7 @@ public void testMoveToUnassigned() { public void testReinitializing() { logger.info("-- build started shard"); - ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); + ShardRouting shard = ShardRouting.newUnassigned("test", 0, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); shard.initialize("node1", -1); shard.moveToStarted(); AllocationId allocationId = shard.allocationId(); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/RoutingTableTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/RoutingTableTests.java index d7e54788c7da7..ed56206d8cb1a 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/RoutingTableTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/RoutingTableTests.java @@ -26,17 +26,17 @@ import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.node.DiscoveryNodes.Builder; import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.test.ESAllocationTestCase; -import org.junit.Before; + +import java.util.*; import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; import static org.elasticsearch.common.settings.Settings.settingsBuilder; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.*; public class RoutingTableTests extends ESAllocationTestCase { @@ -55,8 +55,10 @@ public class RoutingTableTests extends ESAllocationTestCase { .build()); private ClusterState clusterState; + private final Map primaryTermsPerIndex = new HashMap<>(); + private final Map versionsPerIndex = new HashMap<>(); + @Override - @Before public void setUp() throws Exception { super.setUp(); this.numberOfShards = randomIntBetween(1, 5); @@ -65,6 +67,7 @@ public void setUp() throws Exception { this.totalNumberOfShards = this.shardsPerIndex * 2; logger.info("Setup test with " + this.numberOfShards + " shards and " + this.numberOfReplicas + " replicas."); this.emptyRoutingTable = new RoutingTable.Builder().build(); + this.primaryTermsPerIndex.clear(); MetaData metaData = MetaData.builder() .put(createIndexMetaData(TEST_INDEX_1)) .put(createIndexMetaData(TEST_INDEX_2)) @@ -74,6 +77,10 @@ public void setUp() throws Exception { .add(new IndexRoutingTable.Builder(TEST_INDEX_1).initializeAsNew(metaData.index(TEST_INDEX_1)).build()) .add(new IndexRoutingTable.Builder(TEST_INDEX_2).initializeAsNew(metaData.index(TEST_INDEX_2)).build()) .build(); + this.versionsPerIndex.clear(); + this.versionsPerIndex.put(TEST_INDEX_1, new long[numberOfShards]); + this.versionsPerIndex.put(TEST_INDEX_2, new long[numberOfShards]); + this.clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT).metaData(metaData).routingTable(testRoutingTable).build(); } @@ -88,24 +95,105 @@ private void initPrimaries() { } this.clusterState = ClusterState.builder(clusterState).nodes(discoBuilder).build(); RoutingAllocation.Result rerouteResult = ALLOCATION_SERVICE.reroute(clusterState); - this.testRoutingTable = rerouteResult.routingTable(); assertThat(rerouteResult.changed(), is(true)); - this.clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + applyRerouteResult(rerouteResult); + versionsPerIndex.keySet().forEach(this::incrementVersion); + primaryTermsPerIndex.keySet().forEach(this::incrementPrimaryTerm); + } + + private void incrementVersion(String index) { + final long[] versions = versionsPerIndex.get(index); + for (int i = 0; i < versions.length; i++) { + versions[i]++; + } + } + + private void incrementVersion(String index, int shard) { + versionsPerIndex.get(index)[shard]++; + } + + private void incrementPrimaryTerm(String index) { + final long[] primaryTerms = primaryTermsPerIndex.get(index); + for (int i = 0; i < primaryTerms.length; i++) { + primaryTerms[i]++; + } + } + + private void incrementPrimaryTerm(String index, int shard) { + primaryTermsPerIndex.get(index)[shard]++; } private void startInitializingShards(String index) { this.clusterState = ClusterState.builder(clusterState).routingTable(this.testRoutingTable).build(); logger.info("start primary shards for index " + index); RoutingAllocation.Result rerouteResult = ALLOCATION_SERVICE.applyStartedShards(this.clusterState, this.clusterState.getRoutingNodes().shardsWithState(index, INITIALIZING)); - this.clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + // TODO: this simulate the code in InternalClusterState.UpdateTask.run() we should unify this. + applyRerouteResult(rerouteResult); + incrementVersion(index); + } + + private void applyRerouteResult(RoutingAllocation.Result rerouteResult) { + ClusterState previousClusterState = this.clusterState; + ClusterState newClusterState = ClusterState.builder(previousClusterState).routingResult(rerouteResult).build(); + ClusterState.Builder builder = ClusterState.builder(newClusterState).incrementVersion(); + if (previousClusterState.routingTable() != newClusterState.routingTable()) { + builder.routingTable(RoutingTable.builder(newClusterState.routingTable()).version(newClusterState.routingTable().version() + 1).build()); + } + if (previousClusterState.metaData() != newClusterState.metaData()) { + builder.metaData(MetaData.builder(newClusterState.metaData()).version(newClusterState.metaData().version() + 1)); + } + this.clusterState = builder.build(); this.testRoutingTable = rerouteResult.routingTable(); } + private void failSomePrimaries(String index) { + this.clusterState = ClusterState.builder(clusterState).routingTable(this.testRoutingTable).build(); + final IndexRoutingTable indexShardRoutingTable = testRoutingTable.index(index); + Set shardIdsToFail = new HashSet<>(); + for (int i = 1 + randomInt(numberOfShards - 1); i > 0; i--) { + shardIdsToFail.add(randomInt(numberOfShards - 1)); + } + logger.info("failing primary shards {} for index [{}]", shardIdsToFail, index); + List failedShards = new ArrayList<>(); + for (int shard : shardIdsToFail) { + failedShards.add(new FailedRerouteAllocation.FailedShard(indexShardRoutingTable.shard(shard).primaryShard(), "test", null)); + incrementPrimaryTerm(index, shard); // the primary failure should increment the primary term; + incrementVersion(index, shard); // version is incremented once when the primary is unassigned + incrementVersion(index, shard); // and another time when the primary flag is set to false + } + RoutingAllocation.Result rerouteResult = ALLOCATION_SERVICE.applyFailedShards(this.clusterState, failedShards); + applyRerouteResult(rerouteResult); + } + private IndexMetaData.Builder createIndexMetaData(String indexName) { - return new IndexMetaData.Builder(indexName) + primaryTermsPerIndex.put(indexName, new long[numberOfShards]); + final IndexMetaData.Builder builder = new IndexMetaData.Builder(indexName) .settings(DEFAULT_SETTINGS) .numberOfReplicas(this.numberOfReplicas) .numberOfShards(this.numberOfShards); + for (int i = 0; i < numberOfShards; i++) { + builder.primaryTerm(i, randomInt(200)); + primaryTermsPerIndex.get(indexName)[i] = builder.primaryTerm(i); + } + return builder; + } + + private void assertAllVersionAndPrimaryTerm() { + versionsPerIndex.keySet().forEach(this::assertVersionAndPrimaryTerm); + } + + private void assertVersionAndPrimaryTerm(String index) { + final long[] versions = versionsPerIndex.get(index); + final long[] terms = primaryTermsPerIndex.get(index); + final IndexMetaData indexMetaData = clusterState.metaData().index(index); + for (IndexShardRoutingTable shardRoutingTable : this.testRoutingTable.index(index)) { + final int shard = shardRoutingTable.shardId().id(); + for (ShardRouting routing : shardRoutingTable) { + assertThat("wrong version in " + routing, routing.version(), equalTo(versions[shard])); + assertThat("wrong primary term in " + routing, routing.primaryTerm(), equalTo(terms[shard])); + } + assertThat("primary term mismatch between indexMetaData of [" + index + "] and shard [" + shard + "]'s routing", indexMetaData.primaryTerm(shard), equalTo(terms[shard])); + } } public void testAllShards() { @@ -162,6 +250,27 @@ public void testShardsWithState() { assertThat(this.testRoutingTable.shardsWithState(ShardRoutingState.STARTED).size(), is(this.totalNumberOfShards)); } + public void testVersionAndPrimaryTermNormalization() { + assertAllVersionAndPrimaryTerm(); + + initPrimaries(); + assertAllVersionAndPrimaryTerm(); + + startInitializingShards(TEST_INDEX_1); + assertAllVersionAndPrimaryTerm(); + + startInitializingShards(TEST_INDEX_2); + assertAllVersionAndPrimaryTerm(); + + // now start all replicas too + startInitializingShards(TEST_INDEX_1); + startInitializingShards(TEST_INDEX_2); + assertAllVersionAndPrimaryTerm(); + + failSomePrimaries(TEST_INDEX_1); + assertAllVersionAndPrimaryTerm(); + } + public void testActivePrimaryShardsGrouped() { assertThat(this.emptyRoutingTable.activePrimaryShardsGrouped(new String[0], true).size(), is(0)); assertThat(this.emptyRoutingTable.activePrimaryShardsGrouped(new String[0], false).size(), is(0)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingHelper.java b/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingHelper.java index 2139cc29ae1ba..dfd7230b6a5e3 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingHelper.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingHelper.java @@ -53,6 +53,6 @@ public static void moveToUnassigned(ShardRouting routing, UnassignedInfo info) { } public static ShardRouting newWithRestoreSource(ShardRouting routing, RestoreSource restoreSource) { - return new ShardRouting(routing.index(), routing.shardId().id(), routing.currentNodeId(), routing.relocatingNodeId(), restoreSource, routing.primary(), routing.state(), routing.version(), routing.unassignedInfo(), routing.allocationId(), true, routing.getExpectedShardSize()); + return new ShardRouting(routing.index(), routing.shardId().id(), routing.currentNodeId(), routing.relocatingNodeId(), restoreSource, routing.primaryTerm(), routing.primary(), routing.state(), routing.version(), routing.unassignedInfo(), routing.allocationId(), true, routing.getExpectedShardSize()); } } diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingTests.java index 146e80c766552..c547994b3fdc8 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingTests.java @@ -30,10 +30,13 @@ import java.io.IOException; +import static org.hamcrest.Matchers.equalTo; + public class ShardRoutingTests extends ESTestCase { public void testFrozenAfterRead() throws IOException { - ShardRouting routing = TestShardRouting.newShardRouting("foo", 1, "node_1", null, null, false, ShardRoutingState.INITIALIZING, 1); + long term = randomInt(200); + ShardRouting routing = TestShardRouting.newShardRouting("foo", 1, "node_1", null, null, term, false, ShardRoutingState.INITIALIZING, 1); routing.moveToPrimary(); assertTrue(routing.primary()); routing.moveFromPrimary(); @@ -49,11 +52,23 @@ public void testFrozenAfterRead() throws IOException { } } + public void testPrimaryTermIncrementOnPromotion() { + long term = randomInt(200); + ShardRouting routing = TestShardRouting.newShardRouting("foo", 1, "node_1", null, null, term, false, ShardRoutingState.STARTED, 1); + routing.moveToPrimary(); + assertTrue(routing.primary()); + assertThat(routing.primaryTerm(), equalTo(term + 1)); + routing.moveFromPrimary(); + assertFalse(routing.primary()); + assertThat(routing.primaryTerm(), equalTo(term + 1)); + } + public void testIsSameAllocation() { - ShardRouting unassignedShard0 = TestShardRouting.newShardRouting("test", 0, null, false, ShardRoutingState.UNASSIGNED, 1); - ShardRouting unassignedShard1 = TestShardRouting.newShardRouting("test", 1, null, false, ShardRoutingState.UNASSIGNED, 1); - ShardRouting initializingShard0 = TestShardRouting.newShardRouting("test", 0, "1", randomBoolean(), ShardRoutingState.INITIALIZING, 1); - ShardRouting initializingShard1 = TestShardRouting.newShardRouting("test", 1, "1", randomBoolean(), ShardRoutingState.INITIALIZING, 1); + long term = randomInt(200); + ShardRouting unassignedShard0 = TestShardRouting.newShardRouting("test", 0, null, term, false, ShardRoutingState.UNASSIGNED, 1); + ShardRouting unassignedShard1 = TestShardRouting.newShardRouting("test", 1, null, term, false, ShardRoutingState.UNASSIGNED, 1); + ShardRouting initializingShard0 = TestShardRouting.newShardRouting("test", 0, "1", term, randomBoolean(), ShardRoutingState.INITIALIZING, 1); + ShardRouting initializingShard1 = TestShardRouting.newShardRouting("test", 1, "1", term, randomBoolean(), ShardRoutingState.INITIALIZING, 1); ShardRouting startedShard0 = new ShardRouting(initializingShard0); startedShard0.moveToStarted(); ShardRouting startedShard1 = new ShardRouting(initializingShard1); @@ -91,13 +106,14 @@ public void testIsSameShard() { private ShardRouting randomShardRouting(String index, int shard) { ShardRoutingState state = randomFrom(ShardRoutingState.values()); - return TestShardRouting.newShardRouting(index, shard, state == ShardRoutingState.UNASSIGNED ? null : "1", state != ShardRoutingState.UNASSIGNED && randomBoolean(), state, randomInt(5)); + return TestShardRouting.newShardRouting(index, shard, state == ShardRoutingState.UNASSIGNED ? null : "1", randomInt(200), + state != ShardRoutingState.UNASSIGNED && randomBoolean(), state, randomInt(5)); } public void testIsSourceTargetRelocation() { - ShardRouting unassignedShard0 = TestShardRouting.newShardRouting("test", 0, null, false, ShardRoutingState.UNASSIGNED, 1); - ShardRouting initializingShard0 = TestShardRouting.newShardRouting("test", 0, "node1", randomBoolean(), ShardRoutingState.INITIALIZING, 1); - ShardRouting initializingShard1 = TestShardRouting.newShardRouting("test", 1, "node1", randomBoolean(), ShardRoutingState.INITIALIZING, 1); + ShardRouting unassignedShard0 = TestShardRouting.newShardRouting("test", 0, null, randomInt(200), false, ShardRoutingState.UNASSIGNED, 1); + ShardRouting initializingShard0 = TestShardRouting.newShardRouting("test", 0, "node1", randomInt(200), randomBoolean(), ShardRoutingState.INITIALIZING, 1); + ShardRouting initializingShard1 = TestShardRouting.newShardRouting("test", 1, "node1", randomInt(200), randomBoolean(), ShardRoutingState.INITIALIZING, 1); ShardRouting startedShard0 = new ShardRouting(initializingShard0); startedShard0.moveToStarted(); ShardRouting startedShard1 = new ShardRouting(initializingShard1); @@ -139,13 +155,14 @@ public void testIsSourceTargetRelocation() { assertFalse(startedShard0.isRelocationSourceOf(sourceShard0a)); } - public void testEqualsIgnoringVersion() { + public void testEqualsIgnoringMetaData() { ShardRouting routing = randomShardRouting("test", 0); ShardRouting otherRouting = new ShardRouting(routing); - assertTrue("expected equality\nthis " + routing + ",\nother " + otherRouting, routing.equalsIgnoringMetaData(otherRouting)); - otherRouting = new ShardRouting(routing, 1); + otherRouting = new ShardRouting(routing, + randomBoolean() ? routing.version() : routing.version() + 1, + randomBoolean() ? routing.primaryTerm() : routing.primaryTerm() + 1); assertTrue("expected equality\nthis " + routing + ",\nother " + otherRouting, routing.equalsIgnoringMetaData(otherRouting)); @@ -155,36 +172,42 @@ public void testEqualsIgnoringVersion() { switch (changeId) { case 0: // change index - otherRouting = TestShardRouting.newShardRouting(otherRouting.index() + "a", otherRouting.id(), otherRouting.currentNodeId(), otherRouting.relocatingNodeId(), - otherRouting.restoreSource(), otherRouting.primary(), otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); + otherRouting = TestShardRouting.newShardRouting(otherRouting.index() + "a", otherRouting.id(), otherRouting.currentNodeId(), + otherRouting.relocatingNodeId(), otherRouting.restoreSource(), otherRouting.primaryTerm(), otherRouting.primary(), + otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); break; case 1: // change shard id - otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id() + 1, otherRouting.currentNodeId(), otherRouting.relocatingNodeId(), - otherRouting.restoreSource(), otherRouting.primary(), otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); + otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id() + 1, otherRouting.currentNodeId(), + otherRouting.relocatingNodeId(), otherRouting.restoreSource(), otherRouting.primaryTerm(), otherRouting.primary(), + otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); break; case 2: // change current node otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId() == null ? "1" : otherRouting.currentNodeId() + "_1", otherRouting.relocatingNodeId(), - otherRouting.restoreSource(), otherRouting.primary(), otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); + otherRouting.restoreSource(), otherRouting.primaryTerm(), otherRouting.primary(), otherRouting.state(), + otherRouting.version(), otherRouting.unassignedInfo()); break; case 3: // change relocating node otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId(), otherRouting.relocatingNodeId() == null ? "1" : otherRouting.relocatingNodeId() + "_1", - otherRouting.restoreSource(), otherRouting.primary(), otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); + otherRouting.restoreSource(), otherRouting.primaryTerm(), otherRouting.primary(), otherRouting.state(), + otherRouting.version(), otherRouting.unassignedInfo()); break; case 4: // change restore source otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId(), otherRouting.relocatingNodeId(), otherRouting.restoreSource() == null ? new RestoreSource(new SnapshotId("test", "s1"), Version.CURRENT, "test") : new RestoreSource(otherRouting.restoreSource().snapshotId(), Version.CURRENT, otherRouting.index() + "_1"), - otherRouting.primary(), otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); + otherRouting.primaryTerm(), otherRouting.primary(), otherRouting.state(), otherRouting.version(), + otherRouting.unassignedInfo()); break; case 5: // change primary flag - otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId(), otherRouting.relocatingNodeId(), - otherRouting.restoreSource(), otherRouting.primary() == false, otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); + otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId(), + otherRouting.relocatingNodeId(), otherRouting.restoreSource(), otherRouting.primaryTerm(), + otherRouting.primary() == false, otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo()); break; case 6: // change state @@ -198,20 +221,26 @@ public void testEqualsIgnoringVersion() { unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "test"); } - otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId(), otherRouting.relocatingNodeId(), - otherRouting.restoreSource(), otherRouting.primary(), newState, otherRouting.version(), unassignedInfo); + otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId(), + otherRouting.relocatingNodeId(), otherRouting.restoreSource(), otherRouting.primaryTerm(), otherRouting.primary(), + newState, otherRouting.version(), unassignedInfo); break; } if (randomBoolean()) { // change version - otherRouting = new ShardRouting(otherRouting, otherRouting.version() + 1); + otherRouting = new ShardRouting(otherRouting, otherRouting.version() + 1, otherRouting.primaryTerm()); + } + if (randomBoolean()) { + // increase term + otherRouting = new ShardRouting(otherRouting, otherRouting.version(), otherRouting.primaryTerm() + 1); } if (randomBoolean()) { // change unassigned info - otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId(), otherRouting.relocatingNodeId(), - otherRouting.restoreSource(), otherRouting.primary(), otherRouting.state(), otherRouting.version(), + otherRouting = TestShardRouting.newShardRouting(otherRouting.index(), otherRouting.id(), otherRouting.currentNodeId(), + otherRouting.relocatingNodeId(), otherRouting.restoreSource(), otherRouting.primaryTerm(), otherRouting.primary(), + otherRouting.state(), otherRouting.version(), otherRouting.unassignedInfo() == null ? new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "test") : new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, otherRouting.unassignedInfo().getMessage() + "_1")); } diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/TestShardRouting.java b/core/src/test/java/org/elasticsearch/cluster/routing/TestShardRouting.java index df9e1f8af24ef..8a173ca4393d8 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/TestShardRouting.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/TestShardRouting.java @@ -27,26 +27,31 @@ */ public class TestShardRouting { - public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, boolean primary, ShardRoutingState state, long version) { - return new ShardRouting(index, shardId, currentNodeId, null, null, primary, state, version, buildUnassignedInfo(state), buildAllocationId(state), true, -1); + public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, long primaryTerm, boolean primary, + ShardRoutingState state, long version) { + return new ShardRouting(index, shardId, currentNodeId, null, null, primaryTerm, primary, state, version, buildUnassignedInfo(state), buildAllocationId(state), true, -1); } - public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, String relocatingNodeId, boolean primary, ShardRoutingState state, long version) { - return new ShardRouting(index, shardId, currentNodeId, relocatingNodeId, null, primary, state, version, buildUnassignedInfo(state), buildAllocationId(state), true, -1); + public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, String relocatingNodeId, + long primaryTerm, boolean primary, ShardRoutingState state, long version) { + return new ShardRouting(index, shardId, currentNodeId, relocatingNodeId, null, primaryTerm, primary, state, version, buildUnassignedInfo(state), buildAllocationId(state), true, -1); } - public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, String relocatingNodeId, boolean primary, ShardRoutingState state, AllocationId allocationId, long version) { - return new ShardRouting(index, shardId, currentNodeId, relocatingNodeId, null, primary, state, version, buildUnassignedInfo(state), allocationId, true, -1); + public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, String relocatingNodeId, + long primaryTerm, boolean primary, ShardRoutingState state, AllocationId allocationId, long version) { + return new ShardRouting(index, shardId, currentNodeId, relocatingNodeId, null, primaryTerm, primary, state, version, buildUnassignedInfo(state), allocationId, true, -1); } - public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, String relocatingNodeId, RestoreSource restoreSource, boolean primary, ShardRoutingState state, long version) { - return new ShardRouting(index, shardId, currentNodeId, relocatingNodeId, restoreSource, primary, state, version, buildUnassignedInfo(state), buildAllocationId(state), true, -1); + public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, String relocatingNodeId, + RestoreSource restoreSource, long primaryTerm, boolean primary, + ShardRoutingState state, long version) { + return new ShardRouting(index, shardId, currentNodeId, relocatingNodeId, restoreSource, primaryTerm, primary, state, version, buildUnassignedInfo(state), buildAllocationId(state), true, -1); } public static ShardRouting newShardRouting(String index, int shardId, String currentNodeId, - String relocatingNodeId, RestoreSource restoreSource, boolean primary, ShardRoutingState state, long version, - UnassignedInfo unassignedInfo) { - return new ShardRouting(index, shardId, currentNodeId, relocatingNodeId, restoreSource, primary, state, version, unassignedInfo, buildAllocationId(state), true, -1); + String relocatingNodeId, RestoreSource restoreSource, long primaryTerm, boolean primary, + ShardRoutingState state, long version, UnassignedInfo unassignedInfo) { + return new ShardRouting(index, shardId, currentNodeId, relocatingNodeId, restoreSource, primaryTerm, primary, state, version, unassignedInfo, buildAllocationId(state), true, -1); } private static AllocationId buildAllocationId(ShardRoutingState state) { diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java index bd991303ea33f..cdbff4c5f5035 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java @@ -185,7 +185,7 @@ public void testReplicaAdded() { * The unassigned meta is kept when a shard goes to INITIALIZING, but cleared when it moves to STARTED. */ public void testStateTransitionMetaHandling() { - ShardRouting shard = TestShardRouting.newShardRouting("test", 1, null, null, null, true, ShardRoutingState.UNASSIGNED, 1, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); + ShardRouting shard = TestShardRouting.newShardRouting("test", 1, null, null, null, 1, true, ShardRoutingState.UNASSIGNED, 1, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); ShardRouting mutable = new ShardRouting(shard); assertThat(mutable.unassignedInfo(), notNullValue()); mutable.initialize("test_node", -1); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/CatAllocationTestCase.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/CatAllocationTestCase.java index 022e0cca3ae8b..e4c0dc3705f77 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/CatAllocationTestCase.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/CatAllocationTestCase.java @@ -82,7 +82,7 @@ public void testRun() throws IOException { ShardRoutingState state = ShardRoutingState.valueOf(matcher.group(4)); String ip = matcher.group(5); nodes.add(ip); - ShardRouting routing = TestShardRouting.newShardRouting(index, shard, ip, null, null, primary, state, 1); + ShardRouting routing = TestShardRouting.newShardRouting(index, shard, ip, null, null, 1, primary, state, 1); idx.add(routing); logger.debug("Add routing {}", routing); } else { diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/PrimaryElectionRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/PrimaryElectionRoutingTests.java index 119348a9762e4..f4d0c095263e6 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/PrimaryElectionRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/PrimaryElectionRoutingTests.java @@ -58,29 +58,31 @@ public void testBackupElectionToPrimaryWhenPrimaryCanBeAllocatedToAnotherNode() ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT).metaData(metaData).routingTable(routingTable).build(); logger.info("Adding two nodes and performing rerouting"); - clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build(); - RoutingTable prevRoutingTable = routingTable; - routingTable = strategy.reroute(clusterState).routingTable(); - clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1"))).build(); + RoutingAllocation.Result result = strategy.reroute(clusterState); + clusterState = ClusterState.builder(clusterState).routingResult(result).build(); + + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node2"))).build(); + result = strategy.reroute(clusterState); + clusterState = ClusterState.builder(clusterState).routingResult(result).build(); logger.info("Start the primary shard (on node1)"); RoutingNodes routingNodes = clusterState.getRoutingNodes(); - prevRoutingTable = routingTable; - routingTable = strategy.applyStartedShards(clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING)).routingTable(); - clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + result = strategy.applyStartedShards(clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING)); + clusterState = ClusterState.builder(clusterState).routingResult(result).build(); logger.info("Start the backup shard (on node2)"); routingNodes = clusterState.getRoutingNodes(); - prevRoutingTable = routingTable; - routingTable = strategy.applyStartedShards(clusterState, routingNodes.node("node2").shardsWithState(INITIALIZING)).routingTable(); - clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + result = strategy.applyStartedShards(clusterState, routingNodes.node("node2").shardsWithState(INITIALIZING)); + clusterState = ClusterState.builder(clusterState).routingResult(result).build(); logger.info("Adding third node and reroute and kill first node"); clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")).remove("node1")).build(); - prevRoutingTable = routingTable; - routingTable = strategy.reroute(clusterState).routingTable(); - clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + RoutingTable prevRoutingTable = clusterState.routingTable(); + result = strategy.reroute(clusterState); + clusterState = ClusterState.builder(clusterState).routingResult(result).build(); routingNodes = clusterState.getRoutingNodes(); + routingTable = clusterState.routingTable(); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); @@ -89,6 +91,7 @@ public void testBackupElectionToPrimaryWhenPrimaryCanBeAllocatedToAnotherNode() assertThat(routingNodes.node("node3").numberOfShardsWithState(INITIALIZING), equalTo(1)); // verify where the primary is assertThat(routingTable.index("test").shard(0).primaryShard().currentNodeId(), equalTo("node2")); + assertThat(routingTable.index("test").shard(0).primaryShard().primaryTerm(), equalTo(2l)); assertThat(routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId(), equalTo("node3")); } @@ -115,7 +118,7 @@ public void testRemovingInitializingReplicasIfPrimariesFails() { logger.info("Start the primary shards"); RoutingNodes routingNodes = clusterState.getRoutingNodes(); rerouteResult = allocation.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + clusterState = ClusterState.builder(clusterState).routingResult(rerouteResult).build(); routingNodes = clusterState.getRoutingNodes(); assertThat(routingNodes.shardsWithState(STARTED).size(), equalTo(2)); @@ -129,12 +132,13 @@ public void testRemovingInitializingReplicasIfPrimariesFails() { .put(newNode(nodeIdRemaining)) ).build(); rerouteResult = allocation.reroute(clusterState); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + clusterState = ClusterState.builder(clusterState).routingResult(rerouteResult).build(); routingNodes = clusterState.getRoutingNodes(); assertThat(routingNodes.shardsWithState(STARTED).size(), equalTo(1)); assertThat(routingNodes.shardsWithState(INITIALIZING).size(), equalTo(1)); assertThat(routingNodes.node(nodeIdRemaining).shardsWithState(INITIALIZING).get(0).primary(), equalTo(true)); + assertThat(routingNodes.node(nodeIdRemaining).shardsWithState(INITIALIZING).get(0).primaryTerm(), equalTo(2l)); } } diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardStateIT.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardStateIT.java new file mode 100644 index 0000000000000..3a413a6245e5a --- /dev/null +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardStateIT.java @@ -0,0 +1,70 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.cluster.routing.allocation; + +import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.test.ESIntegTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class ShardStateIT extends ESIntegTestCase { + + public void testPrimaryFailureIncreasesTerm() throws Exception { + internalCluster().ensureAtLeastNumDataNodes(2); + prepareCreate("test").setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 2, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1).get(); + ensureGreen(); + assertPrimaryTerms(1, 1); + + logger.info("--> disabling allocation to capture shard failure"); + disableAllocation("test"); + + ClusterState state = client().admin().cluster().prepareState().get().getState(); + final int shard = randomBoolean() ? 0 : 1; + final String nodeId = state.routingTable().index("test").shard(shard).primaryShard().currentNodeId(); + final String node = state.nodes().get(nodeId).name(); + logger.info("--> failing primary of [{}] on node [{}]", shard, node); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + indicesService.indexService("test").getShard(shard).failShard("simulated test failure", null); + + logger.info("--> waiting for a yellow index"); + assertBusy(() -> assertThat(client().admin().cluster().prepareHealth().get().getStatus(), equalTo(ClusterHealthStatus.YELLOW))); + + final long term0 = shard == 0 ? 2 : 1; + final long term1 = shard == 1 ? 2 : 1; + assertPrimaryTerms(term0, term1); + + logger.info("--> enabling allocation"); + enableAllocation("test"); + ensureGreen(); + assertPrimaryTerms(term0, term1); + } + + protected void assertPrimaryTerms(long term0, long term1) { + for (String node : internalCluster().getNodeNames()) { + logger.debug("--> asserting primary terms terms on [{}]", node); + ClusterState state = client(node).admin().cluster().prepareState().setLocal(true).get().getState(); + IndexMetaData metaData = state.metaData().index("test"); + assertThat(metaData.primaryTerm(0), equalTo(term0)); + assertThat(metaData.primaryTerm(1), equalTo(term1)); + } + } +} diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/StartedShardsRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/StartedShardsRoutingTests.java index 28033915abe1d..42a3ffedf0fcf 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/StartedShardsRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/StartedShardsRoutingTests.java @@ -24,19 +24,14 @@ import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.cluster.routing.AllocationId; -import org.elasticsearch.cluster.routing.IndexRoutingTable; -import org.elasticsearch.cluster.routing.IndexShardRoutingTable; -import org.elasticsearch.cluster.routing.RoutingTable; -import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.cluster.routing.ShardRoutingState; -import org.elasticsearch.cluster.routing.TestShardRouting; +import org.elasticsearch.cluster.routing.*; +import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.test.ESAllocationTestCase; import java.util.Arrays; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.*; + public class StartedShardsRoutingTests extends ESAllocationTestCase { public void testStartedShardsMatching() { @@ -44,59 +39,92 @@ public void testStartedShardsMatching() { logger.info("--> building initial cluster state"); final IndexMetaData indexMetaData = IndexMetaData.builder("test") - .settings(settings(Version.CURRENT)) - .numberOfShards(3).numberOfReplicas(0) + .settings(settings(Version.CURRENT).put(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE, "none")) + .numberOfShards(3).numberOfReplicas(1) .build(); ClusterState.Builder stateBuilder = ClusterState.builder(ClusterName.DEFAULT) - .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3"))) .metaData(MetaData.builder().put(indexMetaData, false)); - final ShardRouting initShard = TestShardRouting.newShardRouting("test", 0, "node1", randomBoolean(), ShardRoutingState.INITIALIZING, 1); - final ShardRouting startedShard = TestShardRouting.newShardRouting("test", 1, "node2", randomBoolean(), ShardRoutingState.STARTED, 1); - final ShardRouting relocatingShard = TestShardRouting.newShardRouting("test", 2, "node1", "node2", randomBoolean(), ShardRoutingState.RELOCATING, 1); - stateBuilder.routingTable(RoutingTable.builder().add(IndexRoutingTable.builder("test") - .addIndexShard(new IndexShardRoutingTable.Builder(initShard.shardId()).addShard(initShard).build()) - .addIndexShard(new IndexShardRoutingTable.Builder(startedShard.shardId()).addShard(startedShard).build()) - .addIndexShard(new IndexShardRoutingTable.Builder(relocatingShard.shardId()).addShard(relocatingShard).build())).build()); + final ShardRouting initShard; + final ShardRouting startedShard; + final ShardRouting relocatingShard; + final IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder("test"); + if (randomBoolean()) { + initShard = TestShardRouting.newShardRouting("test", 0, "node1", 1, true, ShardRoutingState.INITIALIZING, 1); + ShardRouting replica = TestShardRouting.newShardRouting("test", 0, null, 1, false, ShardRoutingState.UNASSIGNED, 1); + indexRoutingTable.addIndexShard(new IndexShardRoutingTable.Builder(initShard.shardId()).addShard(initShard).addShard(replica).build()); + } else { + ShardRouting primaryShard = TestShardRouting.newShardRouting("test", 0, "node2", 1, true, ShardRoutingState.STARTED, 1); + initShard = TestShardRouting.newShardRouting("test", 0, "node1", 1, false, ShardRoutingState.INITIALIZING, 1); + indexRoutingTable.addIndexShard(new IndexShardRoutingTable.Builder(initShard.shardId()).addShard(primaryShard).addShard(initShard).build()); + } + if (randomBoolean()) { + startedShard = TestShardRouting.newShardRouting("test", 1, "node2", 1, true, ShardRoutingState.STARTED, 1); + ShardRouting replica = TestShardRouting.newShardRouting("test", 1, null, 1, false, ShardRoutingState.UNASSIGNED, 1); + indexRoutingTable.addIndexShard(new IndexShardRoutingTable.Builder(startedShard.shardId()).addShard(startedShard).addShard(replica).build()); + } else { + ShardRouting primaryShard = TestShardRouting.newShardRouting("test", 1, "node1", 1, true, ShardRoutingState.STARTED, 1); + startedShard = TestShardRouting.newShardRouting("test", 1, "node2", 1, false, ShardRoutingState.STARTED, 1); + indexRoutingTable.addIndexShard(new IndexShardRoutingTable.Builder(startedShard.shardId()).addShard(primaryShard).addShard(startedShard).build()); + } + + if (randomBoolean()) { + relocatingShard = TestShardRouting.newShardRouting("test", 2, "node1", "node2", 1, true, ShardRoutingState.RELOCATING, 1); + ShardRouting replica = TestShardRouting.newShardRouting("test", 2, null, 1, false, ShardRoutingState.UNASSIGNED, 1); + indexRoutingTable.addIndexShard(new IndexShardRoutingTable.Builder(relocatingShard.shardId()).addShard(relocatingShard).addShard(replica).build()); + } else { + ShardRouting primaryShard = TestShardRouting.newShardRouting("test", 2, "node3", 1, true, ShardRoutingState.STARTED, 1); + relocatingShard = TestShardRouting.newShardRouting("test", 2, "node1", "node2", 1, false, ShardRoutingState.RELOCATING, 1); + indexRoutingTable.addIndexShard(new IndexShardRoutingTable.Builder(relocatingShard.shardId()) + .addShard(primaryShard).addShard(relocatingShard).build()); + } + + stateBuilder.routingTable(RoutingTable.builder().add(indexRoutingTable).build()); ClusterState state = stateBuilder.build(); logger.info("--> test starting of shard"); RoutingAllocation.Result result = allocation.applyStartedShards(state, Arrays.asList( - TestShardRouting.newShardRouting(initShard.index(), initShard.id(), initShard.currentNodeId(), initShard.relocatingNodeId(), initShard.primary(), + TestShardRouting.newShardRouting(initShard.index(), initShard.id(), initShard.currentNodeId(), initShard.relocatingNodeId(), + initShard.primaryTerm(), initShard.primary(), ShardRoutingState.INITIALIZING, initShard.allocationId(), randomInt())), false); assertTrue("failed to start " + initShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); - assertTrue(initShard + "isn't started \ncurrent routing table:" + result.routingTable().prettyPrint(), - result.routingTable().index("test").shard(initShard.id()).allShardsStarted()); + final ShardRouting resultRouting = result.routingTable().index("test").shard(initShard.id()).activeShards() + .stream().filter(routing -> routing.isSameAllocation(initShard)).findFirst().get(); + assertThat(initShard + "isn't started \ncurrent routing table:" + result.routingTable().prettyPrint(), resultRouting, notNullValue()); logger.info("--> testing shard variants that shouldn't match the initializing shard"); result = allocation.applyStartedShards(state, Arrays.asList( - TestShardRouting.newShardRouting(initShard.index(), initShard.id(), initShard.currentNodeId(), initShard.relocatingNodeId(), initShard.primary(), + TestShardRouting.newShardRouting(initShard.index(), initShard.id(), initShard.currentNodeId(), initShard.relocatingNodeId(), + initShard.primaryTerm(), initShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse("wrong allocation id flag shouldn't start shard " + initShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); result = allocation.applyStartedShards(state, Arrays.asList( - TestShardRouting.newShardRouting(initShard.index(), initShard.id(), "some_node", initShard.currentNodeId(), initShard.primary(), + TestShardRouting.newShardRouting(initShard.index(), initShard.id(), "some_node", initShard.currentNodeId(), + initShard.primaryTerm(), initShard.primary(), ShardRoutingState.INITIALIZING, AllocationId.newTargetRelocation(AllocationId.newRelocation(initShard.allocationId())) , 1)), false); assertFalse("relocating shard from node shouldn't start shard " + initShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); - logger.info("--> testing double starting"); result = allocation.applyStartedShards(state, Arrays.asList( - TestShardRouting.newShardRouting(startedShard.index(), startedShard.id(), startedShard.currentNodeId(), startedShard.relocatingNodeId(), startedShard.primary(), + TestShardRouting.newShardRouting(startedShard.index(), startedShard.id(), startedShard.currentNodeId(), startedShard.relocatingNodeId(), + startedShard.primaryTerm(), startedShard.primary(), ShardRoutingState.INITIALIZING, startedShard.allocationId(), 1)), false); assertFalse("duplicate starting of the same shard should be ignored \ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); logger.info("--> testing starting of relocating shards"); final AllocationId targetAllocationId = AllocationId.newTargetRelocation(relocatingShard.allocationId()); result = allocation.applyStartedShards(state, Arrays.asList( - TestShardRouting.newShardRouting(relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), relocatingShard.currentNodeId(), relocatingShard.primary(), + TestShardRouting.newShardRouting(relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), + relocatingShard.currentNodeId(), relocatingShard.primaryTerm(), relocatingShard.primary(), ShardRoutingState.INITIALIZING, targetAllocationId, randomInt())), false); assertTrue("failed to start " + relocatingShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); @@ -108,12 +136,14 @@ public void testStartedShardsMatching() { logger.info("--> testing shard variants that shouldn't match the initializing relocating shard"); result = allocation.applyStartedShards(state, Arrays.asList( - TestShardRouting.newShardRouting(relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), relocatingShard.currentNodeId(), relocatingShard.primary(), + TestShardRouting.newShardRouting(relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), + relocatingShard.currentNodeId(), relocatingShard.primaryTerm(), relocatingShard.primary(), ShardRoutingState.INITIALIZING, relocatingShard.version()))); assertFalse("wrong allocation id shouldn't start shard" + relocatingShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); result = allocation.applyStartedShards(state, Arrays.asList( - TestShardRouting.newShardRouting(relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), relocatingShard.currentNodeId(), relocatingShard.primary(), + TestShardRouting.newShardRouting(relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), + relocatingShard.currentNodeId(), relocatingShard.primaryTerm(), relocatingShard.primary(), ShardRoutingState.INITIALIZING, relocatingShard.allocationId(), randomInt())), false); assertFalse("wrong allocation id shouldn't start shard even if relocatingId==shard.id" + relocatingShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java index 2f9104a570114..4597111158875 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java @@ -29,14 +29,7 @@ import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.cluster.routing.IndexRoutingTable; -import org.elasticsearch.cluster.routing.IndexShardRoutingTable; -import org.elasticsearch.cluster.routing.RoutingNode; -import org.elasticsearch.cluster.routing.RoutingNodes; -import org.elasticsearch.cluster.routing.RoutingTable; -import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.cluster.routing.ShardRoutingState; -import org.elasticsearch.cluster.routing.TestShardRouting; +import org.elasticsearch.cluster.routing.*; import org.elasticsearch.cluster.routing.allocation.AllocationService; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.allocator.ShardsAllocators; @@ -55,14 +48,9 @@ import java.util.HashSet; import java.util.Map; -import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; -import static org.elasticsearch.cluster.routing.ShardRoutingState.RELOCATING; -import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; -import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; +import static org.elasticsearch.cluster.routing.ShardRoutingState.*; import static org.elasticsearch.common.settings.Settings.settingsBuilder; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.*; public class DiskThresholdDeciderTests extends ESAllocationTestCase { @@ -124,8 +112,8 @@ public void addListener(Listener listener) { logger.info("--> adding two nodes"); clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder() - .put(newNode("node1")) - .put(newNode("node2")) + .put(newNode("node1")) + .put(newNode("node2")) ).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); @@ -155,7 +143,7 @@ public void addListener(Listener listener) { logger.info("--> adding node3"); clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()) - .put(newNode("node3")) + .put(newNode("node3")) ).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); @@ -242,7 +230,7 @@ public void addListener(Listener listener) { logger.info("--> adding node4"); clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()) - .put(newNode("node4")) + .put(newNode("node4")) ).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); @@ -390,7 +378,7 @@ public void addListener(Listener listener) { logger.info("--> adding node3"); clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()) - .put(newNode("node3")) + .put(newNode("node3")) ).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); @@ -477,7 +465,7 @@ public void addListener(Listener listener) { logger.info("--> adding node4"); clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()) - .put(newNode("node4")) + .put(newNode("node4")) ).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); @@ -844,8 +832,8 @@ public void testCanRemainWithShardRelocatingAway() { .build(); // Two shards consuming each 80% of disk space while 70% is allowed, so shard 0 isn't allowed here - ShardRouting firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, null, true, ShardRoutingState.STARTED, 1); - ShardRouting secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", null, null, true, ShardRoutingState.STARTED, 1); + ShardRouting firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, null, 1, true, ShardRoutingState.STARTED, 1); + ShardRouting secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", null, null, 1, true, ShardRoutingState.STARTED, 1); RoutingNode firstRoutingNode = new RoutingNode("node1", discoveryNode1, Arrays.asList(firstRouting, secondRouting)); RoutingTable.Builder builder = RoutingTable.builder().add( IndexRoutingTable.builder("test") @@ -864,8 +852,8 @@ public void testCanRemainWithShardRelocatingAway() { assertThat(decision.type(), equalTo(Decision.Type.NO)); // Two shards consuming each 80% of disk space while 70% is allowed, but one is relocating, so shard 0 can stay - firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, null, true, ShardRoutingState.STARTED, 1); - secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", "node2", null, true, ShardRoutingState.RELOCATING, 1); + firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, null, 1, true, ShardRoutingState.STARTED, 1); + secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", "node2", null, 1, true, ShardRoutingState.RELOCATING, 1); firstRoutingNode = new RoutingNode("node1", discoveryNode1, Arrays.asList(firstRouting, secondRouting)); builder = RoutingTable.builder().add( IndexRoutingTable.builder("test") @@ -897,7 +885,7 @@ public void addListener(Listener listener) { } }; AllocationDeciders deciders = new AllocationDeciders(Settings.EMPTY, new HashSet<>(Arrays.asList( - new SameShardAllocationDecider(Settings.EMPTY), diskThresholdDecider + new SameShardAllocationDecider(Settings.EMPTY), diskThresholdDecider ))); AllocationService strategy = new AllocationService(settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) @@ -962,8 +950,8 @@ public void testForSingleDataNode() { .build(); // Two shards consumes 80% of disk space in data node, but we have only one data node, shards should remain. - ShardRouting firstRouting = TestShardRouting.newShardRouting("test", 0, "node2", null, null, true, ShardRoutingState.STARTED, 1); - ShardRouting secondRouting = TestShardRouting.newShardRouting("test", 1, "node2", null, null, true, ShardRoutingState.STARTED, 1); + ShardRouting firstRouting = TestShardRouting.newShardRouting("test", 0, "node2", null, null, 1, true, ShardRoutingState.STARTED, 1); + ShardRouting secondRouting = TestShardRouting.newShardRouting("test", 1, "node2", null, null, 1, true, ShardRoutingState.STARTED, 1); RoutingNode firstRoutingNode = new RoutingNode("node2", discoveryNode2, Arrays.asList(firstRouting, secondRouting)); RoutingTable.Builder builder = RoutingTable.builder().add( @@ -1019,8 +1007,8 @@ public void addListener(Listener listener) { ClusterState updateClusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()) .put(discoveryNode3)).build(); - firstRouting = TestShardRouting.newShardRouting("test", 0, "node2", null, null, true, ShardRoutingState.STARTED, 1); - secondRouting = TestShardRouting.newShardRouting("test", 1, "node2", "node3", null, true, ShardRoutingState.RELOCATING, 1); + firstRouting = TestShardRouting.newShardRouting("test", 0, "node2", null, null, 1, true, ShardRoutingState.STARTED, 1); + secondRouting = TestShardRouting.newShardRouting("test", 1, "node2", "node3", null, 1, true, ShardRoutingState.RELOCATING, 1); firstRoutingNode = new RoutingNode("node2", discoveryNode2, Arrays.asList(firstRouting, secondRouting)); builder = RoutingTable.builder().add( IndexRoutingTable.builder("test") diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index fa31d8306e52b..0f680cb596e76 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -20,21 +20,13 @@ package org.elasticsearch.cluster.routing.allocation.decider; import org.elasticsearch.Version; -import org.elasticsearch.cluster.ClusterInfo; -import org.elasticsearch.cluster.ClusterInfoService; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.DiskUsage; -import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.*; import org.elasticsearch.cluster.MockInternalClusterInfoService.DevNullClusterInfo; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.cluster.routing.RoutingNode; -import org.elasticsearch.cluster.routing.RoutingTable; -import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.cluster.routing.ShardRoutingHelper; -import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.*; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.settings.Settings; @@ -98,7 +90,7 @@ public void testCanAllocateUsesMaxAvailableSpace() { ClusterInfoService cis = EmptyClusterInfoService.INSTANCE; DiskThresholdDecider decider = new DiskThresholdDecider(Settings.EMPTY, nss, cis, null); - ShardRouting test_0 = ShardRouting.newUnassigned("test", 0, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_0 = ShardRouting.newUnassigned("test", 0, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); DiscoveryNode node_0 = new DiscoveryNode("node_0", DummyTransportAddress.INSTANCE, Version.CURRENT); DiscoveryNode node_1 = new DiscoveryNode("node_1", DummyTransportAddress.INSTANCE, Version.CURRENT); @@ -143,22 +135,22 @@ public void testCanRemainUsesLeastAvailableSpace() { DiscoveryNode node_0 = new DiscoveryNode("node_0", DummyTransportAddress.INSTANCE, Version.CURRENT); DiscoveryNode node_1 = new DiscoveryNode("node_1", DummyTransportAddress.INSTANCE, Version.CURRENT); - ShardRouting test_0 = ShardRouting.newUnassigned("test", 0, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_0 = ShardRouting.newUnassigned("test", 0, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_0, node_0.getId()); ShardRoutingHelper.moveToStarted(test_0); shardRoutingMap.put(test_0, "/node0/least"); - ShardRouting test_1 = ShardRouting.newUnassigned("test", 1, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_1 = ShardRouting.newUnassigned("test", 1, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_1, node_1.getId()); ShardRoutingHelper.moveToStarted(test_1); shardRoutingMap.put(test_1, "/node1/least"); - ShardRouting test_2 = ShardRouting.newUnassigned("test", 2, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_2 = ShardRouting.newUnassigned("test", 2, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_2, node_1.getId()); ShardRoutingHelper.moveToStarted(test_2); shardRoutingMap.put(test_2, "/node1/most"); - ShardRouting test_3 = ShardRouting.newUnassigned("test", 3, null, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_3 = ShardRouting.newUnassigned("test", 3, null, 1, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_3, node_1.getId()); ShardRoutingHelper.moveToStarted(test_3); // Intentionally not in the shardRoutingMap. We want to test what happens when we don't know where it is. @@ -223,17 +215,17 @@ public void testShardSizeAndRelocatingSize() { shardSizes.put("[test][2][r]", 1000L); shardSizes.put("[other][0][p]", 10000L); ClusterInfo info = new DevNullClusterInfo(ImmutableOpenMap.of(), ImmutableOpenMap.of(), shardSizes.build()); - ShardRouting test_0 = ShardRouting.newUnassigned("test", 0, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_0 = ShardRouting.newUnassigned("test", 0, null, 1, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_0, "node1"); ShardRoutingHelper.moveToStarted(test_0); ShardRoutingHelper.relocate(test_0, "node2"); - ShardRouting test_1 = ShardRouting.newUnassigned("test", 1, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_1 = ShardRouting.newUnassigned("test", 1, null, 1, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_1, "node2"); ShardRoutingHelper.moveToStarted(test_1); ShardRoutingHelper.relocate(test_1, "node1"); - ShardRouting test_2 = ShardRouting.newUnassigned("test", 2, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_2 = ShardRouting.newUnassigned("test", 2, null, 1, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_2, "node1"); ShardRoutingHelper.moveToStarted(test_2); @@ -247,13 +239,13 @@ public void testShardSizeAndRelocatingSize() { assertEquals(0l, DiskThresholdDecider.sizeOfRelocatingShards(node, info, true, "/dev/some/other/dev")); assertEquals(0l, DiskThresholdDecider.sizeOfRelocatingShards(node, info, true, "/dev/some/other/dev")); - ShardRouting test_3 = ShardRouting.newUnassigned("test", 3, null, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting test_3 = ShardRouting.newUnassigned("test", 3, null, 1, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(test_3, "node1"); ShardRoutingHelper.moveToStarted(test_3); assertEquals(0l, DiskThresholdDecider.getShardSize(test_3, info)); - ShardRouting other_0 = ShardRouting.newUnassigned("other", 0, null, randomBoolean(), new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); + ShardRouting other_0 = ShardRouting.newUnassigned("other", 0, null, 1, randomBoolean(), new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); ShardRoutingHelper.initialize(other_0, "node2"); ShardRoutingHelper.moveToStarted(other_0); ShardRoutingHelper.relocate(other_0, "node1"); diff --git a/core/src/test/java/org/elasticsearch/discovery/zen/NodeJoinControllerTests.java b/core/src/test/java/org/elasticsearch/discovery/zen/NodeJoinControllerTests.java index 70c9430b53125..1a2bf630fecc7 100644 --- a/core/src/test/java/org/elasticsearch/discovery/zen/NodeJoinControllerTests.java +++ b/core/src/test/java/org/elasticsearch/discovery/zen/NodeJoinControllerTests.java @@ -487,17 +487,17 @@ public NoopAllocationService(Settings settings) { @Override public RoutingAllocation.Result applyStartedShards(ClusterState clusterState, List startedShards, boolean withReroute) { - return new RoutingAllocation.Result(false, clusterState.routingTable()); + return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); } @Override public RoutingAllocation.Result applyFailedShards(ClusterState clusterState, List failedShards) { - return new RoutingAllocation.Result(false, clusterState.routingTable()); + return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); } @Override public RoutingAllocation.Result reroute(ClusterState clusterState, boolean debug) { - return new RoutingAllocation.Result(false, clusterState.routingTable()); + return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); } } diff --git a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java index 259821503408e..56c974581b753 100644 --- a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java @@ -40,6 +40,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESAllocationTestCase; +import org.hamcrest.Matcher; import org.junit.Before; import java.io.IOException; @@ -72,12 +73,12 @@ public void buildTestAllocator() { * and processes only the applicable shard. */ public void testNoProcessReplica() { - ShardRouting shard = TestShardRouting.newShardRouting("test", 0, null, null, null, false, ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null)); + ShardRouting shard = TestShardRouting.newShardRouting("test", 0, null, null, null, 1, false, ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null)); assertThat(testAllocator.needToFindPrimaryCopy(shard), equalTo(false)); } - public void testNoProcessPrimayNotAllcoatedBefore() { - ShardRouting shard = TestShardRouting.newShardRouting("test", 0, null, null, null, true, ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); + public void testNoProcessPrimaryNotAllocatedBefore() { + ShardRouting shard = TestShardRouting.newShardRouting("test", 0, null, null, null, 1, true, ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); assertThat(testAllocator.needToFindPrimaryCopy(shard), equalTo(false)); } @@ -87,9 +88,7 @@ public void testNoProcessPrimayNotAllcoatedBefore() { public void testNoAsyncFetchData() { RoutingAllocation allocation = routingAllocationWithOnePrimaryNoReplicas(yesAllocationDeciders()); boolean changed = testAllocator.allocateUnassigned(allocation); - assertThat(changed, equalTo(false)); - assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); - assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId)); + assertNotAllocated(allocation, changed); } /** @@ -99,9 +98,7 @@ public void testNoAllocationFound() { RoutingAllocation allocation = routingAllocationWithOnePrimaryNoReplicas(yesAllocationDeciders()); testAllocator.addData(node1, -1); boolean changed = testAllocator.allocateUnassigned(allocation); - assertThat(changed, equalTo(false)); - assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); - assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId)); + assertNotAllocated(allocation, changed); } /** @@ -111,9 +108,14 @@ public void testStoreException() { RoutingAllocation allocation = routingAllocationWithOnePrimaryNoReplicas(yesAllocationDeciders()); testAllocator.addData(node1, 3, new CorruptIndexException("test", "test")); boolean changed = testAllocator.allocateUnassigned(allocation); + assertNotAllocated(allocation, changed); + } + + protected void assertNotAllocated(RoutingAllocation allocation, boolean changed) { assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId)); + assertThat(allocation.routingNodes().unassigned().ignored().get(0).primaryTerm(), equalTo(0l)); } /** @@ -124,9 +126,18 @@ public void testFoundAllocationAndAllocating() { testAllocator.addData(node1, 10); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); + assertShardAllocated(allocation, node1); + } + + protected void assertShardAllocated(RoutingAllocation allocation, DiscoveryNode... nodes) { assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true)); assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node1.id())); + final Matcher[] nodeMatchers = new Matcher[nodes.length]; + for (int i = 0; i < nodes.length; i++) { + nodeMatchers[i] = equalTo(nodes[i].id()); + } + assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), anyOf((Matcher[]) nodeMatchers)); + assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).primaryTerm(), equalTo(1L)); } /** @@ -137,9 +148,7 @@ public void testFoundAllocationButThrottlingDecider() { RoutingAllocation allocation = routingAllocationWithOnePrimaryNoReplicas(throttleAllocationDeciders()); testAllocator.addData(node1, 10); boolean changed = testAllocator.allocateUnassigned(allocation); - assertThat(changed, equalTo(false)); - assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); - assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId)); + assertNotAllocated(allocation, changed); } /** @@ -151,9 +160,7 @@ public void testFoundAllocationButNoDecider() { testAllocator.addData(node1, 10); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); - assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true)); - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node1.id())); + assertShardAllocated(allocation, node1); } /** @@ -164,9 +171,7 @@ public void testAllocateToTheHighestVersion() { testAllocator.addData(node1, 10).addData(node2, 12); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); - assertThat(allocation.routingNodes().unassigned().ignored().isEmpty(), equalTo(true)); - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node2.id())); + assertShardAllocated(allocation, node2); } /** @@ -227,10 +232,8 @@ public void testEnoughCopiesFoundForAllocation() { allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state.nodes(), null); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); - assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), anyOf(equalTo(node2.id()), equalTo(node1.id()))); + assertShardAllocated(allocation, node1, node2); } /** @@ -270,12 +273,11 @@ public void testEnoughCopiesFoundForAllocationWithDifferentVersion() { assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); - assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node2.id())); + assertShardAllocated(allocation, node2); } public void testAllocationOnAnyNodeWithSharedFs() { - ShardRouting shard = TestShardRouting.newShardRouting("test", 0, null, null, null, false, + ShardRouting shard = TestShardRouting.newShardRouting("test", 0, null, null, null, 1, false, ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null)); @@ -299,7 +301,7 @@ public void testAllocationOnAnyNodeWithSharedFs() { } public void testAllocationOnAnyNodeShouldPutNodesWithExceptionsLast() { - ShardRouting shard = TestShardRouting.newShardRouting("test", 0, null, null, null, false, + ShardRouting shard = TestShardRouting.newShardRouting("test", 0, null, null, null, 1, false, ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null)); diff --git a/core/src/test/java/org/elasticsearch/gateway/PriorityComparatorTests.java b/core/src/test/java/org/elasticsearch/gateway/PriorityComparatorTests.java index 88499bf96cd9e..9a114bd11d5fa 100644 --- a/core/src/test/java/org/elasticsearch/gateway/PriorityComparatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/PriorityComparatorTests.java @@ -30,8 +30,9 @@ public class PriorityComparatorTests extends ESTestCase { public void testPreferNewIndices() { RoutingNodes.UnassignedShards shards = new RoutingNodes.UnassignedShards((RoutingNodes) null); List shardRoutings = Arrays.asList(TestShardRouting.newShardRouting("oldest", 0, null, null, null, - randomBoolean(), ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar")), TestShardRouting.newShardRouting("newest", 0, null, null, null, - randomBoolean(), ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar"))); + 1, randomBoolean(), ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar")), + TestShardRouting.newShardRouting("newest", 0, null, null, null, + 1, randomBoolean(), ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar"))); Collections.shuffle(shardRoutings, random()); for (ShardRouting routing : shardRoutings) { shards.add(routing); @@ -59,9 +60,10 @@ protected Settings getIndexSettings(String index) { public void testPreferPriorityIndices() { RoutingNodes.UnassignedShards shards = new RoutingNodes.UnassignedShards((RoutingNodes) null); - List shardRoutings = Arrays.asList(TestShardRouting.newShardRouting("oldest", 0, null, null, null, - randomBoolean(), ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar")), TestShardRouting.newShardRouting("newest", 0, null, null, null, - randomBoolean(), ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar"))); + List shardRoutings = Arrays.asList(TestShardRouting.newShardRouting("oldest", 0, null, null, null, 1, + randomBoolean(), ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar")), + TestShardRouting.newShardRouting("newest", 0, null, null, null, 1, + randomBoolean(), ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar"))); Collections.shuffle(shardRoutings, random()); for (ShardRouting routing : shardRoutings) { shards.add(routing); @@ -97,15 +99,16 @@ public void testPriorityComparatorSort() { if (frequently()) { indices[i] = new IndexMeta("idx_2015_04_" + String.format(Locale.ROOT, "%02d", i), randomIntBetween(1, 1000), randomIntBetween(1, 10000)); } else { // sometimes just use defaults - indices[i] = new IndexMeta("idx_2015_04_" + String.format(Locale.ROOT, "%02d", i)); + indices[i] = new IndexMeta("idx_2015_04_" + String.format(Locale.ROOT, "%02d", i)); } map.put(indices[i].name, indices[i]); } int numShards = randomIntBetween(10, 100); for (int i = 0; i < numShards; i++) { IndexMeta indexMeta = randomFrom(indices); - shards.add(TestShardRouting.newShardRouting(indexMeta.name, randomIntBetween(1, 5), null, null, null, - randomBoolean(), ShardRoutingState.UNASSIGNED, randomIntBetween(0, 100), new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar"))); + shards.add(TestShardRouting.newShardRouting(indexMeta.name, randomIntBetween(1, 5), null, null, null, 1, + randomBoolean(), ShardRoutingState.UNASSIGNED, randomIntBetween(0, 100), + new UnassignedInfo(randomFrom(UnassignedInfo.Reason.values()), "foobar"))); } shards.sort(new PriorityComparator() { @Override @@ -128,7 +131,7 @@ protected Settings getIndexSettings(String index) { assertTrue("creationDate mismatch, expected:" + currentMeta.creationDate + " after " + prevMeta.creationDate, prevMeta.creationDate > currentMeta.creationDate); } } else { - assertTrue("priority mismatch, expected:" + currentMeta.priority + " after " + prevMeta.priority, prevMeta.priority > currentMeta.priority); + assertTrue("priority mismatch, expected:" + currentMeta.priority + " after " + prevMeta.priority, prevMeta.priority > currentMeta.priority); } } previous = routing; diff --git a/core/src/test/java/org/elasticsearch/gateway/RecoveryFromGatewayIT.java b/core/src/test/java/org/elasticsearch/gateway/RecoveryFromGatewayIT.java index 5d1a0313066b4..fcdbbbcb248dd 100644 --- a/core/src/test/java/org/elasticsearch/gateway/RecoveryFromGatewayIT.java +++ b/core/src/test/java/org/elasticsearch/gateway/RecoveryFromGatewayIT.java @@ -19,11 +19,16 @@ package org.elasticsearch.gateway; +import com.carrotsearch.hppc.cursors.ObjectCursor; import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse; import org.elasticsearch.action.admin.indices.stats.IndexStats; import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.routing.IndexRoutingTable; +import org.elasticsearch.cluster.routing.IndexShardRoutingTable; +import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; import org.elasticsearch.common.settings.Settings; @@ -39,6 +44,10 @@ import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.test.store.MockFSDirectoryService; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.IntStream; + import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.settings.Settings.settingsBuilder; @@ -47,10 +56,7 @@ import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.not; -import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.*; @ClusterScope(numDataNodes = 0, scope = Scope.TEST) public class RecoveryFromGatewayIT extends ESIntegTestCase { @@ -78,10 +84,13 @@ public void testOneNodeRecoverFromGateway() throws Exception { assertHitCount(client().prepareSearch().setSize(0).setQuery(termQuery("appAccountIds", 179)).execute().actionGet(), 2); ensureYellow("test"); // wait for primary allocations here otherwise if we have a lot of shards we might have a // shard that is still in post recovery when we restart and the ensureYellow() below will timeout + + Map primaryTerms = assertAndCapturePrimaryTerms(null); internalCluster().fullRestart(); logger.info("Running Cluster Health (wait for the shards to startup)"); ensureYellow(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); client().admin().indices().prepareRefresh().execute().actionGet(); assertHitCount(client().prepareSearch().setSize(0).setQuery(termQuery("appAccountIds", 179)).execute().actionGet(), 2); @@ -90,11 +99,45 @@ public void testOneNodeRecoverFromGateway() throws Exception { logger.info("Running Cluster Health (wait for the shards to startup)"); ensureYellow(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); client().admin().indices().prepareRefresh().execute().actionGet(); assertHitCount(client().prepareSearch().setSize(0).setQuery(termQuery("appAccountIds", 179)).execute().actionGet(), 2); } + private Map assertAndCapturePrimaryTerms(Map previousTerms) { + if (previousTerms == null) { + previousTerms = new HashMap<>(); + } + final Map result = new HashMap<>(); + final ClusterState state = client().admin().cluster().prepareState().get().getState(); + for (ObjectCursor cursor : state.metaData().indices().values()) { + final IndexMetaData indexMetaData = cursor.value; + final String index = indexMetaData.getIndex(); + final long[] previous = previousTerms.get(index); + final long[] current = IntStream.range(0, indexMetaData.getNumberOfShards()).mapToLong(indexMetaData::primaryTerm).toArray(); + if (previous == null) { + result.put(index, current); + } else { + assertThat("number of terms changed for index [" + index + "]", current.length, equalTo(previous.length)); + for (int shard = 0; shard < current.length; shard++) { + assertThat("primary term didn't increase for [" + index + "][" + shard + "]", current[shard], greaterThan(previous[shard])); + } + result.put(index, current); + } + } + + for (IndexRoutingTable indexRoutingTable : state.routingTable()) { + final long[] terms = result.get(indexRoutingTable.index()); + for (IndexShardRoutingTable shardRoutingTable : indexRoutingTable) { + for (ShardRouting routing : shardRoutingTable.shards()) { + assertThat("wrong primary term for " + routing, routing.primaryTerm(), equalTo(terms[routing.shardId().id()])); + } + } + } + return result; + } + public void testSingleNodeNoFlush() throws Exception { internalCluster().startNode(); @@ -153,10 +196,14 @@ SETTING_NUMBER_OF_REPLICAS, randomIntBetween(0, 1) logger.info("Ensure all primaries have been started"); ensureYellow(); } + + Map primaryTerms = assertAndCapturePrimaryTerms(null); + internalCluster().fullRestart(); logger.info("Running Cluster Health (wait for the shards to startup)"); ensureYellow(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); for (int i = 0; i <= randomInt(10); i++) { assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).get(), value1Docs + value2Docs); @@ -170,6 +217,7 @@ SETTING_NUMBER_OF_REPLICAS, randomIntBetween(0, 1) logger.info("Running Cluster Health (wait for the shards to startup)"); ensureYellow(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); for (int i = 0; i <= randomInt(10); i++) { assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).get(), value1Docs + value2Docs); @@ -191,10 +239,13 @@ public void testSingleNodeWithFlush() throws Exception { ensureYellow("test"); // wait for primary allocations here otherwise if we have a lot of shards we might have a // shard that is still in post recovery when we restart and the ensureYellow() below will timeout + Map primaryTerms = assertAndCapturePrimaryTerms(null); + internalCluster().fullRestart(); logger.info("Running Cluster Health (wait for the shards to startup)"); ensureYellow(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); for (int i = 0; i < 10; i++) { assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).execute().actionGet(), 2); @@ -204,6 +255,7 @@ public void testSingleNodeWithFlush() throws Exception { logger.info("Running Cluster Health (wait for the shards to startup)"); ensureYellow(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); for (int i = 0; i < 10; i++) { assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).execute().actionGet(), 2); @@ -226,6 +278,8 @@ public void testTwoNodeFirstNodeCleared() throws Exception { assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).execute().actionGet(), 2); } + Map primaryTerms = assertAndCapturePrimaryTerms(null); + internalCluster().fullRestart(new RestartCallback() { @Override public Settings onNodeStopped(String nodeName) throws Exception { @@ -241,6 +295,7 @@ public boolean clearData(String nodeName) { logger.info("Running Cluster Health (wait for the shards to startup)"); ensureGreen(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); for (int i = 0; i < 10; i++) { assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).execute().actionGet(), 2); @@ -266,6 +321,8 @@ public void testLatestVersionLoaded() throws Exception { String metaDataUuid = client().admin().cluster().prepareState().execute().get().getState().getMetaData().clusterUUID(); assertThat(metaDataUuid, not(equalTo("_na_"))); + Map primaryTerms = assertAndCapturePrimaryTerms(null); + logger.info("--> closing first node, and indexing more data to the second node"); internalCluster().fullRestart(new RestartCallback() { @@ -305,6 +362,7 @@ public void doAfterNodes(int numNodes, Client client) throws Exception { logger.info("--> running cluster_health (wait for the shards to startup)"); ensureGreen(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); assertThat(client().admin().cluster().prepareState().execute().get().getState().getMetaData().clusterUUID(), equalTo(metaDataUuid)); @@ -378,11 +436,15 @@ public void testReusePeerRecovery() throws Exception { .setTransientSettings(settingsBuilder() .put(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE, EnableAllocationDecider.Allocation.NONE)) .get(); + + Map primaryTerms = assertAndCapturePrimaryTerms(null); + logger.info("--> full cluster restart"); internalCluster().fullRestart(); logger.info("--> waiting for cluster to return to green after {}shutdown", useSyncIds ? "" : "second "); ensureGreen(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); if (useSyncIds) { assertSyncIdsNotNull(); @@ -437,6 +499,8 @@ public void testRecoveryDifferentNodeOrderStartup() throws Exception { internalCluster().startNode(settingsBuilder().put("path.data", createTempDir()).build()); ensureGreen(); + Map primaryTerms = assertAndCapturePrimaryTerms(null); + internalCluster().fullRestart(new RestartCallback() { @@ -447,6 +511,7 @@ public boolean doRestart(String nodeName) { }); ensureYellow(); + primaryTerms = assertAndCapturePrimaryTerms(primaryTerms); assertThat(client().admin().indices().prepareExists("test").execute().actionGet().isExists(), equalTo(true)); assertHitCount(client().prepareSearch("test").setSize(0).setQuery(QueryBuilders.matchAllQuery()).execute().actionGet(), 1); diff --git a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java index 6a0aabe8d536f..e8a4902b6763c 100644 --- a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java @@ -287,8 +287,8 @@ private RoutingAllocation onePrimaryOnNode1And1Replica(AllocationDeciders decide RoutingTable routingTable = RoutingTable.builder() .add(IndexRoutingTable.builder(shardId.getIndex()) .addIndexShard(new IndexShardRoutingTable.Builder(shardId) - .addShard(TestShardRouting.newShardRouting(shardId.getIndex(), shardId.getId(), node1.id(), true, ShardRoutingState.STARTED, 10)) - .addShard(ShardRouting.newUnassigned(shardId.getIndex(), shardId.getId(), null, false, new UnassignedInfo(reason, null))) + .addShard(TestShardRouting.newShardRouting(shardId.getIndex(), shardId.getId(), node1.id(), 1, true, ShardRoutingState.STARTED, 10)) + .addShard(ShardRouting.newUnassigned(shardId.getIndex(), shardId.getId(), null, 1, false, new UnassignedInfo(reason, null))) .build()) ) .build(); @@ -306,8 +306,8 @@ private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDecid RoutingTable routingTable = RoutingTable.builder() .add(IndexRoutingTable.builder(shardId.getIndex()) .addIndexShard(new IndexShardRoutingTable.Builder(shardId) - .addShard(TestShardRouting.newShardRouting(shardId.getIndex(), shardId.getId(), node1.id(), true, ShardRoutingState.STARTED, 10)) - .addShard(TestShardRouting.newShardRouting(shardId.getIndex(), shardId.getId(), node2.id(), null, null, false, ShardRoutingState.INITIALIZING, 10, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null))) + .addShard(TestShardRouting.newShardRouting(shardId.getIndex(), shardId.getId(), node1.id(), 1, true, ShardRoutingState.STARTED, 10)) + .addShard(TestShardRouting.newShardRouting(shardId.getIndex(), shardId.getId(), node2.id(), null, null, 1, false, ShardRoutingState.INITIALIZING, 10, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null))) .build()) ) .build(); diff --git a/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index c73420aaa4461..83b34b3b7a493 100644 --- a/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -225,8 +225,10 @@ public void testPersistenceStateMetadataPersistence() throws Exception { assertEquals(shardStateMetaData, getShardStateMetadata(shard)); assertEquals(shardStateMetaData, new ShardStateMetaData(routing.version(), routing.primary(), shard.indexSettings.get(IndexMetaData.SETTING_INDEX_UUID))); - // test if we still write it even if the shard is not active - ShardRouting inactiveRouting = TestShardRouting.newShardRouting(shard.shardRouting.index(), shard.shardRouting.shardId().id(), shard.shardRouting.currentNodeId(), null, null, true, ShardRoutingState.INITIALIZING, shard.shardRouting.version() + 1); + // test if we don't write it if the shard is not active + ShardRouting inactiveRouting = TestShardRouting.newShardRouting(shard.shardRouting.index(), shard.shardRouting.shardId().id(), + shard.shardRouting.currentNodeId(), null, null, shard.shardRouting.primaryTerm(), true, ShardRoutingState.INITIALIZING, + shard.shardRouting.version() + 1); shard.persistMetadata(inactiveRouting, shard.shardRouting); shardStateMetaData = load(logger, env.availableShardPaths(shard.shardId)); assertEquals("inactive shard state shouldn't be persisted", shardStateMetaData, getShardStateMetadata(shard)); @@ -264,7 +266,9 @@ public void testDeleteShardState() throws IOException { ShardStateMetaData shardStateMetaData = load(logger, env.availableShardPaths(shard.shardId)); assertEquals(shardStateMetaData, getShardStateMetadata(shard)); - routing = TestShardRouting.newShardRouting(shard.shardId.index().getName(), shard.shardId.id(), routing.currentNodeId(), null, routing.primary(), ShardRoutingState.INITIALIZING, shard.shardRouting.allocationId(), shard.shardRouting.version() + 1); + routing = TestShardRouting.newShardRouting(shard.shardId.index().getName(), shard.shardId.id(), routing.currentNodeId(), null, + shard.shardRouting.primaryTerm(), routing.primary(), ShardRoutingState.INITIALIZING, shard.shardRouting.allocationId(), + shard.shardRouting.version() + 1); shard.updateRoutingEntry(routing, true); shard.deleteShardState(); diff --git a/core/src/test/java/org/elasticsearch/indices/flush/SyncedFlushUnitTests.java b/core/src/test/java/org/elasticsearch/indices/flush/SyncedFlushUnitTests.java index 19cce93c6e4e6..8fad646d8b828 100644 --- a/core/src/test/java/org/elasticsearch/indices/flush/SyncedFlushUnitTests.java +++ b/core/src/test/java/org/elasticsearch/indices/flush/SyncedFlushUnitTests.java @@ -108,7 +108,7 @@ protected TestPlan createTestPlan() { Map shardResponses = new HashMap<>(); for (int copy = 0; copy < replicas + 1; copy++) { final ShardRouting shardRouting = TestShardRouting.newShardRouting(index, shard, "node_" + shardId + "_" + copy, null, - copy == 0, ShardRoutingState.STARTED, 0); + 1, copy == 0, ShardRoutingState.STARTED, 0); if (randomInt(5) < 2) { // shard copy failure failed++; diff --git a/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreIntegrationIT.java b/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreIntegrationIT.java index b56e3ad36473e..26de4a994e295 100644 --- a/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreIntegrationIT.java +++ b/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreIntegrationIT.java @@ -415,7 +415,7 @@ public ClusterState execute(ClusterState currentState) throws Exception { for (int i = 0; i < numShards; i++) { indexRoutingTableBuilder.addIndexShard( new IndexShardRoutingTable.Builder(new ShardId("test", i)) - .addShard(TestShardRouting.newShardRouting("test", i, masterId, true, ShardRoutingState.STARTED, shardVersions[shardIds[i]])) + .addShard(TestShardRouting.newShardRouting("test", i, masterId, 1, true, ShardRoutingState.STARTED, shardVersions[shardIds[i]])) .build() ); } diff --git a/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreTests.java b/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreTests.java index ec6a3b3849151..b4b582f1e1022 100644 --- a/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreTests.java +++ b/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreTests.java @@ -81,10 +81,10 @@ public void testShardCanBeDeletedNoShardStarted() throws Exception { ClusterState.Builder clusterState = ClusterState.builder(new ClusterName("test")); clusterState.metaData(MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(numShards).numberOfReplicas(numReplicas))); IndexShardRoutingTable.Builder routingTable = new IndexShardRoutingTable.Builder(new ShardId("test", 1)); - + final int primaryTerm = randomInt(200); for (int i = 0; i < numShards; i++) { int unStartedShard = randomInt(numReplicas); - for (int j=0; j <= numReplicas; j++) { + for (int j = 0; j <= numReplicas; j++) { ShardRoutingState state; if (j == unStartedShard) { state = randomFrom(NOT_STARTED_STATES); @@ -95,7 +95,7 @@ public void testShardCanBeDeletedNoShardStarted() throws Exception { if (state == ShardRoutingState.UNASSIGNED) { unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null); } - routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", null, null, j == 0, state, 0, unassignedInfo)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", null, null, primaryTerm, j == 0, state, 0, unassignedInfo)); } } assertFalse(indicesStore.shardCanBeDeleted(clusterState.build(), routingTable.build())); @@ -110,12 +110,13 @@ public void testShardCanBeDeletedShardExistsLocally() throws Exception { clusterState.nodes(DiscoveryNodes.builder().localNodeId(localNode.id()).put(localNode).put(new DiscoveryNode("xyz", new LocalTransportAddress("xyz"), Version.CURRENT))); IndexShardRoutingTable.Builder routingTable = new IndexShardRoutingTable.Builder(new ShardId("test", 1)); int localShardId = randomInt(numShards - 1); + final int primaryTerm = randomInt(200); for (int i = 0; i < numShards; i++) { String nodeId = i == localShardId ? localNode.getId() : randomBoolean() ? "abc" : "xyz"; String relocationNodeId = randomBoolean() ? null : randomBoolean() ? localNode.getId() : "xyz"; - routingTable.addShard(TestShardRouting.newShardRouting("test", i, nodeId, relocationNodeId, true, ShardRoutingState.STARTED, 0)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, nodeId, relocationNodeId, primaryTerm, true, ShardRoutingState.STARTED, 0)); for (int j = 0; j < numReplicas; j++) { - routingTable.addShard(TestShardRouting.newShardRouting("test", i, nodeId, relocationNodeId, false, ShardRoutingState.STARTED, 0)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, nodeId, relocationNodeId, primaryTerm, false, ShardRoutingState.STARTED, 0)); } } @@ -131,11 +132,12 @@ public void testShardCanBeDeletedNodeNotInList() throws Exception { clusterState.metaData(MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(numShards).numberOfReplicas(numReplicas))); clusterState.nodes(DiscoveryNodes.builder().localNodeId(localNode.id()).put(localNode)); IndexShardRoutingTable.Builder routingTable = new IndexShardRoutingTable.Builder(new ShardId("test", 1)); + final int primaryTerm = randomInt(200); for (int i = 0; i < numShards; i++) { String relocatingNodeId = randomBoolean() ? null : "def"; - routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", relocatingNodeId, true, ShardRoutingState.STARTED, 0)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", relocatingNodeId, primaryTerm, true, ShardRoutingState.STARTED, 0)); for (int j = 0; j < numReplicas; j++) { - routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", relocatingNodeId, false, ShardRoutingState.STARTED, 0)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", relocatingNodeId, primaryTerm, false, ShardRoutingState.STARTED, 0)); } } @@ -153,10 +155,11 @@ public void testShardCanBeDeletedNodeVersion() throws Exception { clusterState.metaData(MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(numShards).numberOfReplicas(numReplicas))); clusterState.nodes(DiscoveryNodes.builder().localNodeId(localNode.id()).put(localNode).put(new DiscoveryNode("xyz", new LocalTransportAddress("xyz"), nodeVersion))); IndexShardRoutingTable.Builder routingTable = new IndexShardRoutingTable.Builder(new ShardId("test", 1)); + final int primaryTerm = randomInt(200); for (int i = 0; i < numShards; i++) { - routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", null, true, ShardRoutingState.STARTED, 0)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", null, primaryTerm, true, ShardRoutingState.STARTED, 0)); for (int j = 0; j < numReplicas; j++) { - routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", null, false, ShardRoutingState.STARTED, 0)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", null, primaryTerm, false, ShardRoutingState.STARTED, 0)); } } @@ -171,7 +174,7 @@ public void testShardCanBeDeletedRelocatingNode() throws Exception { ClusterState.Builder clusterState = ClusterState.builder(new ClusterName("test")); clusterState.metaData(MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(numShards).numberOfReplicas(numReplicas))); final Version nodeVersion = randomBoolean() ? CURRENT : randomVersion(random()); - + final int primaryTerm = randomInt(200); clusterState.nodes(DiscoveryNodes.builder().localNodeId(localNode.id()) .put(localNode) .put(new DiscoveryNode("xyz", new LocalTransportAddress("xyz"), Version.CURRENT)) @@ -179,9 +182,9 @@ public void testShardCanBeDeletedRelocatingNode() throws Exception { )); IndexShardRoutingTable.Builder routingTable = new IndexShardRoutingTable.Builder(new ShardId("test", 1)); for (int i = 0; i < numShards; i++) { - routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", "def", true, ShardRoutingState.STARTED, 0)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", "def", primaryTerm, true, ShardRoutingState.STARTED, 0)); for (int j = 0; j < numReplicas; j++) { - routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", "def", false, ShardRoutingState.STARTED, 0)); + routingTable.addShard(TestShardRouting.newShardRouting("test", i, "xyz", "def", primaryTerm, false, ShardRoutingState.STARTED, 0)); } } From 5fb0f9a88ff40b3bc0f9fe81f181dd90ea76c6ea Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Tue, 3 Nov 2015 13:43:48 +0000 Subject: [PATCH 02/12] Add Sequence Numbers and enforce Primary Terms Adds a counter to each write operation on a shard. This sequence numbers is indexed into lucene using doc values, for now (we will probably require indexing to support range searchers in the future). On top of this, primary term semantics are enforced and shards will refuse write operation coming from an older primary. Other notes: - The add SequenceServiceNumber is just a skeleton and will be replaced with much heavier one, once we have all the building blocks (i.e., checkpoints). - I completely ignored recovery - for this we will need checkpoints as well. - A new based class is introduced for all single doc write operations. This is handy to unify common logic (like toXContent). - For now, we don't use seq# as versioning. We could in the future. Relates to #10708 Closes #14651 --- .../action/DocWriteResponse.java | 143 ++++++++++++++++ ...Response.java => ReplicationResponse.java} | 9 +- .../indices/flush/TransportFlushAction.java | 8 +- .../flush/TransportShardFlushAction.java | 12 +- .../refresh/TransportRefreshAction.java | 8 +- .../refresh/TransportShardRefreshAction.java | 12 +- .../action/bulk/BulkItemResponse.java | 44 +---- .../action/bulk/BulkShardResponse.java | 4 +- .../action/bulk/TransportShardBulkAction.java | 46 ++--- .../action/delete/DeleteResponse.java | 65 ++------ .../action/delete/TransportDeleteAction.java | 22 ++- .../action/index/IndexResponse.java | 75 +++------ .../action/index/TransportIndexAction.java | 46 ++++- .../replication/ReplicationRequest.java | 33 ++++ .../TransportBroadcastReplicationAction.java | 19 +-- .../TransportReplicationAction.java | 82 +++------ .../action/update/TransportUpdateAction.java | 11 +- .../action/update/UpdateHelper.java | 11 +- .../action/update/UpdateResponse.java | 74 +++------ .../cluster/routing/ShardRouting.java | 9 + .../elasticsearch/index/engine/Engine.java | 47 ++++-- .../index/engine/EngineConfig.java | 22 ++- .../index/engine/InternalEngine.java | 20 +++ .../index/mapper/DocumentMapper.java | 14 +- .../index/mapper/DocumentParser.java | 5 +- .../index/mapper/ParseContext.java | 27 ++- .../index/mapper/ParsedDocument.java | 9 +- .../mapper/internal/SeqNoFieldMapper.java | 151 +++++++++++++++++ .../index/seqno/SequenceNumbersService.java | 60 +++++++ .../IllegalIndexShardStateException.java | 9 +- .../elasticsearch/index/shard/IndexShard.java | 157 +++++++++++------- .../shard/TranslogRecoveryPerformer.java | 7 +- .../index/translog/Translog.java | 68 ++++++-- .../rest/action/bulk/RestBulkAction.java | 54 ++---- .../rest/action/delete/RestDeleteAction.java | 23 +-- .../rest/action/index/RestIndexAction.java | 27 +-- .../rest/action/update/RestUpdateAction.java | 34 +--- .../BaseTransportResponseHandler.java | 2 +- .../BroadcastReplicationTests.java | 44 +++-- .../ClusterStateCreationUtils.java | 11 +- .../TransportReplicationActionTests.java | 35 +++- .../cluster/routing/ShardRoutingTests.java | 6 + .../elasticsearch/document/ShardInfoIT.java | 11 +- .../index/engine/InternalEngineTests.java | 152 +++++++++-------- .../index/engine/ShadowEngineTests.java | 7 +- .../index/indexing/IndexingSlowLogTests.java | 3 +- .../internal/FieldNamesFieldMapperTests.java | 6 +- .../index/shard/IndexShardTests.java | 37 ++++- .../index/translog/TranslogTests.java | 10 +- .../flush/SyncedFlushSingleNodeTests.java | 2 +- .../routing/SimpleRoutingIT.java | 2 +- .../TransportDeleteByQueryActionTests.java | 7 +- 52 files changed, 1095 insertions(+), 707 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/action/DocWriteResponse.java rename core/src/main/java/org/elasticsearch/action/{ActionWriteResponse.java => ReplicationResponse.java} (96%) create mode 100644 core/src/main/java/org/elasticsearch/index/mapper/internal/SeqNoFieldMapper.java create mode 100644 core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java diff --git a/core/src/main/java/org/elasticsearch/action/DocWriteResponse.java b/core/src/main/java/org/elasticsearch/action/DocWriteResponse.java new file mode 100644 index 0000000000000..bb63075a0b902 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/action/DocWriteResponse.java @@ -0,0 +1,143 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.action; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; +import org.elasticsearch.index.seqno.SequenceNumbersService; +import org.elasticsearch.index.shard.ShardId; + +import java.io.IOException; + +/** + * A base class for the response of a write operation that involves a single doc + */ +public abstract class DocWriteResponse extends ReplicationResponse implements ToXContent { + + private ShardId shardId; + private String id; + private String type; + private long version; + private long seqNo; + + public DocWriteResponse(ShardId shardId, String type, String id, long seqNo, long version) { + this.shardId = shardId; + this.type = type; + this.id = id; + this.seqNo = seqNo; + this.version = version; + } + + // needed for deserialization + protected DocWriteResponse() { + } + + /** + * The index the document was changed in. + */ + public String getIndex() { + return this.shardId.getIndex(); + } + + + /** + * The exact shard the document was changed in. + */ + public ShardId getShardId() { + return this.shardId; + } + + /** + * The type of the document changed. + */ + public String getType() { + return this.type; + } + + /** + * The id of the document changed. + */ + public String getId() { + return this.id; + } + + /** + * Returns the current version of the doc. + */ + public long getVersion() { + return this.version; + } + + /** + * Returns the sequence number assigned for this change. Returns {@link SequenceNumbersService#UNASSIGNED_SEQ_NO} if the operation wasn't + * performed (i.e., an update operation that resulted in a NOOP). + */ + public long getSeqNo() { + return seqNo; + } + + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + shardId = ShardId.readShardId(in); + type = in.readString(); + id = in.readString(); + version = in.readZLong(); + seqNo = in.readZLong(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + shardId.writeTo(out); + out.writeString(type); + out.writeString(id); + out.writeZLong(version); + out.writeZLong(seqNo); + } + + static final class Fields { + static final XContentBuilderString _INDEX = new XContentBuilderString("_index"); + static final XContentBuilderString _TYPE = new XContentBuilderString("_type"); + static final XContentBuilderString _ID = new XContentBuilderString("_id"); + static final XContentBuilderString _VERSION = new XContentBuilderString("_version"); + static final XContentBuilderString _SHARD_ID = new XContentBuilderString("_shard_id"); + static final XContentBuilderString _SEQ_NO = new XContentBuilderString("_seq_no"); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + ReplicationResponse.ShardInfo shardInfo = getShardInfo(); + builder.field(Fields._INDEX, getIndex()) + .field(Fields._TYPE, getType()) + .field(Fields._ID, getId()) + .field(Fields._VERSION, getVersion()); + //nocommit: i'm not sure we want to expose it in the api but it will be handy for debugging while we work... + builder.field(Fields._SHARD_ID, shardId.id()); + if (getSeqNo() >= 0) { + builder.field(Fields._SEQ_NO, getSeqNo()); + } + shardInfo.toXContent(builder, params); + return builder; + } +} diff --git a/core/src/main/java/org/elasticsearch/action/ActionWriteResponse.java b/core/src/main/java/org/elasticsearch/action/ReplicationResponse.java similarity index 96% rename from core/src/main/java/org/elasticsearch/action/ActionWriteResponse.java rename to core/src/main/java/org/elasticsearch/action/ReplicationResponse.java index f4152ac85e4aa..4e358c8d42a73 100644 --- a/core/src/main/java/org/elasticsearch/action/ActionWriteResponse.java +++ b/core/src/main/java/org/elasticsearch/action/ReplicationResponse.java @@ -21,7 +21,6 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.bootstrap.Elasticsearch; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; @@ -30,25 +29,23 @@ import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; -import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.rest.RestStatus; import java.io.IOException; -import java.util.Collections; /** * Base class for write action responses. */ -public class ActionWriteResponse extends ActionResponse { +public class ReplicationResponse extends ActionResponse { - public final static ActionWriteResponse.ShardInfo.Failure[] EMPTY = new ActionWriteResponse.ShardInfo.Failure[0]; + public final static ReplicationResponse.ShardInfo.Failure[] EMPTY = new ReplicationResponse.ShardInfo.Failure[0]; private ShardInfo shardInfo; @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); - shardInfo = ActionWriteResponse.ShardInfo.readShardInfo(in); + shardInfo = ReplicationResponse.ShardInfo.readShardInfo(in); } @Override diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportFlushAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportFlushAction.java index ac159625420e7..bbcaf7ef5398f 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportFlushAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportFlushAction.java @@ -19,7 +19,7 @@ package org.elasticsearch.action.admin.indices.flush; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.ShardOperationFailedException; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.replication.TransportBroadcastReplicationAction; @@ -36,7 +36,7 @@ /** * Flush Action. */ -public class TransportFlushAction extends TransportBroadcastReplicationAction { +public class TransportFlushAction extends TransportBroadcastReplicationAction { @Inject public TransportFlushAction(Settings settings, ThreadPool threadPool, ClusterService clusterService, @@ -47,8 +47,8 @@ public TransportFlushAction(Settings settings, ThreadPool threadPool, ClusterSer } @Override - protected ActionWriteResponse newShardResponse() { - return new ActionWriteResponse(); + protected ReplicationResponse newShardResponse() { + return new ReplicationResponse(); } @Override diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportShardFlushAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportShardFlushAction.java index f768cfedc9444..7e8bd9eb8cbd6 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportShardFlushAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportShardFlushAction.java @@ -19,7 +19,7 @@ package org.elasticsearch.action.admin.indices.flush; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.replication.TransportReplicationAction; import org.elasticsearch.cluster.ClusterService; @@ -42,7 +42,7 @@ /** * */ -public class TransportShardFlushAction extends TransportReplicationAction { +public class TransportShardFlushAction extends TransportReplicationAction { public static final String NAME = FlushAction.NAME + "[s]"; @@ -56,16 +56,16 @@ public TransportShardFlushAction(Settings settings, TransportService transportSe } @Override - protected ActionWriteResponse newResponseInstance() { - return new ActionWriteResponse(); + protected ReplicationResponse newResponseInstance() { + return new ReplicationResponse(); } @Override - protected Tuple shardOperationOnPrimary(ClusterState clusterState, PrimaryOperationRequest shardRequest) throws Throwable { + protected Tuple shardOperationOnPrimary(ClusterState clusterState, PrimaryOperationRequest shardRequest) throws Throwable { IndexShard indexShard = indicesService.indexServiceSafe(shardRequest.shardId.getIndex()).getShard(shardRequest.shardId.id()); indexShard.flush(shardRequest.request.getRequest()); logger.trace("{} flush request executed on primary", indexShard.shardId()); - return new Tuple<>(new ActionWriteResponse(), shardRequest.request); + return new Tuple<>(new ReplicationResponse(), shardRequest.request); } @Override diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportRefreshAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportRefreshAction.java index e2d978d306f26..55b3696c10bcf 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportRefreshAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportRefreshAction.java @@ -19,7 +19,7 @@ package org.elasticsearch.action.admin.indices.refresh; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.ShardOperationFailedException; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.replication.ReplicationRequest; @@ -37,7 +37,7 @@ /** * Refresh action. */ -public class TransportRefreshAction extends TransportBroadcastReplicationAction { +public class TransportRefreshAction extends TransportBroadcastReplicationAction { @Inject public TransportRefreshAction(Settings settings, ThreadPool threadPool, ClusterService clusterService, @@ -48,8 +48,8 @@ public TransportRefreshAction(Settings settings, ThreadPool threadPool, ClusterS } @Override - protected ActionWriteResponse newShardResponse() { - return new ActionWriteResponse(); + protected ReplicationResponse newShardResponse() { + return new ReplicationResponse(); } @Override diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java index a06483acb1d0d..229475b80f786 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java @@ -19,7 +19,7 @@ package org.elasticsearch.action.admin.indices.refresh; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.replication.ReplicationRequest; import org.elasticsearch.action.support.replication.TransportReplicationAction; @@ -43,7 +43,7 @@ /** * */ -public class TransportShardRefreshAction extends TransportReplicationAction { +public class TransportShardRefreshAction extends TransportReplicationAction { public static final String NAME = RefreshAction.NAME + "[s]"; @@ -57,16 +57,16 @@ public TransportShardRefreshAction(Settings settings, TransportService transport } @Override - protected ActionWriteResponse newResponseInstance() { - return new ActionWriteResponse(); + protected ReplicationResponse newResponseInstance() { + return new ReplicationResponse(); } @Override - protected Tuple shardOperationOnPrimary(ClusterState clusterState, PrimaryOperationRequest shardRequest) throws Throwable { + protected Tuple shardOperationOnPrimary(ClusterState clusterState, PrimaryOperationRequest shardRequest) throws Throwable { IndexShard indexShard = indicesService.indexServiceSafe(shardRequest.shardId.getIndex()).getShard(shardRequest.shardId.id()); indexShard.refresh("api"); logger.trace("{} refresh request executed on primary", indexShard.shardId()); - return new Tuple<>(new ActionWriteResponse(), shardRequest.request); + return new Tuple<>(new ReplicationResponse(), shardRequest.request); } @Override diff --git a/core/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java b/core/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java index 80e86eaaf1719..7b6be2d8dbfc7 100644 --- a/core/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java +++ b/core/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java @@ -20,7 +20,7 @@ package org.elasticsearch.action.bulk; import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.delete.DeleteResponse; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.update.UpdateResponse; @@ -99,7 +99,7 @@ public Throwable getCause() { private String opType; - private ActionWriteResponse response; + private DocWriteResponse response; private Failure failure; @@ -107,7 +107,7 @@ public Throwable getCause() { } - public BulkItemResponse(int id, String opType, ActionWriteResponse response) { + public BulkItemResponse(int id, String opType, DocWriteResponse response) { this.id = id; this.opType = opType; this.response = response; @@ -140,14 +140,7 @@ public String getIndex() { if (failure != null) { return failure.getIndex(); } - if (response instanceof IndexResponse) { - return ((IndexResponse) response).getIndex(); - } else if (response instanceof DeleteResponse) { - return ((DeleteResponse) response).getIndex(); - } else if (response instanceof UpdateResponse) { - return ((UpdateResponse) response).getIndex(); - } - return null; + return response.getIndex(); } /** @@ -157,14 +150,7 @@ public String getType() { if (failure != null) { return failure.getType(); } - if (response instanceof IndexResponse) { - return ((IndexResponse) response).getType(); - } else if (response instanceof DeleteResponse) { - return ((DeleteResponse) response).getType(); - } else if (response instanceof UpdateResponse) { - return ((UpdateResponse) response).getType(); - } - return null; + return response.getType(); } /** @@ -174,14 +160,7 @@ public String getId() { if (failure != null) { return failure.getId(); } - if (response instanceof IndexResponse) { - return ((IndexResponse) response).getId(); - } else if (response instanceof DeleteResponse) { - return ((DeleteResponse) response).getId(); - } else if (response instanceof UpdateResponse) { - return ((UpdateResponse) response).getId(); - } - return null; + return response.getId(); } /** @@ -191,21 +170,14 @@ public long getVersion() { if (failure != null) { return -1; } - if (response instanceof IndexResponse) { - return ((IndexResponse) response).getVersion(); - } else if (response instanceof DeleteResponse) { - return ((DeleteResponse) response).getVersion(); - } else if (response instanceof UpdateResponse) { - return ((UpdateResponse) response).getVersion(); - } - return -1; + return response.getVersion(); } /** * The actual response ({@link IndexResponse} or {@link DeleteResponse}). null in * case of failure. */ - public T getResponse() { + public T getResponse() { return (T) response; } diff --git a/core/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java b/core/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java index 6b08627f5de60..76c80a9b0640a 100644 --- a/core/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java +++ b/core/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java @@ -19,7 +19,7 @@ package org.elasticsearch.action.bulk; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.index.shard.ShardId; @@ -29,7 +29,7 @@ /** * */ -public class BulkShardResponse extends ActionWriteResponse { +public class BulkShardResponse extends ReplicationResponse { private ShardId shardId; private BulkItemResponse[] responses; diff --git a/core/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java b/core/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java index 0f00b87b12a3f..2283f779ef5eb 100644 --- a/core/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java +++ b/core/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java @@ -25,8 +25,10 @@ import org.elasticsearch.action.RoutingMissingException; import org.elasticsearch.action.delete.DeleteRequest; import org.elasticsearch.action.delete.DeleteResponse; +import org.elasticsearch.action.delete.TransportDeleteAction; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.action.index.TransportIndexAction; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.replication.TransportReplicationAction; import org.elasticsearch.action.update.UpdateHelper; @@ -134,6 +136,9 @@ protected Tuple shardOperationOnPrimary(Clu IndexResponse indexResponse = result.response(); setResponse(item, new BulkItemResponse(item.id(), indexRequest.opType().lowercase(), indexResponse)); } catch (Throwable e) { + // nocommit: since we now have RetryOnPrimaryException, retrying doesn't always mean the shard is closed. + // some operations were already perform and have a seqno assigned. we shouldn't just reindex them + // if we have a pending mapping update // rethrow the failure if we are going to retry on primary and let parent failure to handle it if (retryPrimaryException(e)) { // restore updated versions... @@ -164,11 +169,13 @@ protected Tuple shardOperationOnPrimary(Clu try { // add the response - final WriteResult writeResult = shardDeleteOperation(request, deleteRequest, indexShard); + final WriteResult writeResult = TransportDeleteAction.executeDeleteRequestOnPrimary(deleteRequest, indexShard); DeleteResponse deleteResponse = writeResult.response(); location = locationToSync(location, writeResult.location); setResponse(item, new BulkItemResponse(item.id(), OP_TYPE_DELETE, deleteResponse)); } catch (Throwable e) { + // nocommit: since we now have RetryOnPrimaryException, retrying doesn't always mean the shard is closed. + // some operations were already perform and have a seqno assigned. we shouldn't just reindex them // rethrow the failure if we are going to retry on primary and let parent failure to handle it if (retryPrimaryException(e)) { // restore updated versions... @@ -216,7 +223,8 @@ protected Tuple shardOperationOnPrimary(Clu BytesReference indexSourceAsBytes = indexRequest.source(); // add the response IndexResponse indexResponse = result.response(); - UpdateResponse updateResponse = new UpdateResponse(indexResponse.getShardInfo(), indexResponse.getIndex(), indexResponse.getType(), indexResponse.getId(), indexResponse.getVersion(), indexResponse.isCreated()); + UpdateResponse updateResponse = new UpdateResponse(indexResponse.getShardInfo(), indexResponse.getShardId(), + indexResponse.getType(), indexResponse.getId(), indexResponse.getSeqNo(), indexResponse.getVersion(), indexResponse.isCreated()); if (updateRequest.fields() != null && updateRequest.fields().length > 0) { Tuple> sourceAndContent = XContentHelper.convertToMap(indexSourceAsBytes, true); updateResponse.setGetResult(updateHelper.extractGetResult(updateRequest, shardRequest.request.index(), indexResponse.getVersion(), sourceAndContent.v2(), sourceAndContent.v1(), indexSourceAsBytes)); @@ -228,7 +236,8 @@ protected Tuple shardOperationOnPrimary(Clu WriteResult writeResult = updateResult.writeResult; DeleteResponse response = writeResult.response(); DeleteRequest deleteRequest = updateResult.request(); - updateResponse = new UpdateResponse(response.getShardInfo(), response.getIndex(), response.getType(), response.getId(), response.getVersion(), false); + updateResponse = new UpdateResponse(response.getShardInfo(), response.getShardId(), response.getType(), + response.getId(), response.getSeqNo(), response.getVersion(), false); updateResponse.setGetResult(updateHelper.extractGetResult(updateRequest, shardRequest.request.index(), response.getVersion(), updateResult.result.updatedSourceAsMap(), updateResult.result.updateSourceContentType(), null)); // Replace the update request to the translated delete request to execute on the replica. item = request.items()[requestIndex] = new BulkItemRequest(request.items()[requestIndex].id(), deleteRequest); @@ -250,6 +259,8 @@ protected Tuple shardOperationOnPrimary(Clu new BulkItemResponse.Failure(request.index(), updateRequest.type(), updateRequest.id(), t))); } } else { + // nocommit: since we now have RetryOnPrimaryException, retrying doesn't always mean the shard is closed. + // some operations were already perform and have a seqno assigned. we shouldn't just reindex them // rethrow the failure if we are going to retry on primary and let parent failure to handle it if (retryPrimaryException(t)) { // restore updated versions... @@ -304,7 +315,7 @@ protected Tuple shardOperationOnPrimary(Clu assert preVersionTypes[requestIndex] != null; } - processAfter(request.refresh(), indexShard, location); + processAfterWrite(request.refresh(), indexShard, location); BulkItemResponse[] responses = new BulkItemResponse[request.items().length]; BulkItemRequest[] items = request.items(); for (int i = 0; i < items.length; i++) { @@ -320,7 +331,7 @@ private void setResponse(BulkItemRequest request, BulkItemResponse response) { } } - private WriteResult shardIndexOperation(BulkShardRequest request, IndexRequest indexRequest, ClusterState clusterState, + private WriteResult shardIndexOperation(BulkShardRequest request, IndexRequest indexRequest, ClusterState clusterState, IndexShard indexShard, boolean processed) throws Throwable { // validate, if routing is required, that we got routing @@ -335,20 +346,7 @@ private WriteResult shardIndexOperation(BulkShardRequest request, IndexRequest i indexRequest.process(clusterState.metaData(), mappingMd, allowIdGeneration, request.index()); } - return executeIndexRequestOnPrimary(request, indexRequest, indexShard); - } - - private WriteResult shardDeleteOperation(BulkShardRequest request, DeleteRequest deleteRequest, IndexShard indexShard) { - Engine.Delete delete = indexShard.prepareDelete(deleteRequest.type(), deleteRequest.id(), deleteRequest.version(), deleteRequest.versionType(), Engine.Operation.Origin.PRIMARY); - indexShard.delete(delete); - // update the request with the version so it will go to the replicas - deleteRequest.versionType(delete.versionType().versionTypeForReplicationAndRecovery()); - deleteRequest.version(delete.version()); - - assert deleteRequest.versionType().validateVersionForWrites(deleteRequest.version()); - - DeleteResponse deleteResponse = new DeleteResponse(request.index(), deleteRequest.type(), deleteRequest.id(), delete.version(), delete.found()); - return new WriteResult(deleteResponse, delete.getTranslogLocation()); + return TransportIndexAction.executeIndexRequestOnPrimary(indexRequest, indexShard, mappingUpdatedAction); } static class UpdateResult { @@ -424,7 +422,7 @@ private UpdateResult shardUpdateOperation(ClusterState clusterState, BulkShardRe case DELETE: DeleteRequest deleteRequest = translate.action(); try { - WriteResult result = shardDeleteOperation(bulkShardRequest, deleteRequest, indexShard); + WriteResult result = TransportDeleteAction.executeDeleteRequestOnPrimary(deleteRequest, indexShard); return new UpdateResult(translate, deleteRequest, result); } catch (Throwable t) { t = ExceptionsHelper.unwrapCause(t); @@ -460,7 +458,8 @@ protected void shardOperationOnReplica(ShardId shardId, BulkShardRequest request SourceToParse sourceToParse = SourceToParse.source(SourceToParse.Origin.REPLICA, indexRequest.source()).index(shardId.getIndex()).type(indexRequest.type()).id(indexRequest.id()) .routing(indexRequest.routing()).parent(indexRequest.parent()).timestamp(indexRequest.timestamp()).ttl(indexRequest.ttl()); - final Engine.Index operation = indexShard.prepareIndex(sourceToParse, indexRequest.version(), indexRequest.versionType(), Engine.Operation.Origin.REPLICA); + final Engine.Index operation = indexShard.prepareIndexOnReplica(sourceToParse, + indexRequest.seqNo(), indexRequest.version(), indexRequest.versionType()); Mapping update = operation.parsedDoc().dynamicMappingsUpdate(); if (update != null) { throw new RetryOnReplicaException(shardId, "Mappings are not available on the replica yet, triggered update: " + update); @@ -477,7 +476,8 @@ protected void shardOperationOnReplica(ShardId shardId, BulkShardRequest request } else if (item.request() instanceof DeleteRequest) { DeleteRequest deleteRequest = (DeleteRequest) item.request(); try { - Engine.Delete delete = indexShard.prepareDelete(deleteRequest.type(), deleteRequest.id(), deleteRequest.version(), deleteRequest.versionType(), Engine.Operation.Origin.REPLICA); + Engine.Delete delete = indexShard.prepareDeleteOnReplica(deleteRequest.type(), deleteRequest.id(), + deleteRequest.seqNo(), deleteRequest.version(), deleteRequest.versionType()); indexShard.delete(delete); location = locationToSync(location, delete.getTranslogLocation()); } catch (Throwable e) { @@ -492,7 +492,7 @@ protected void shardOperationOnReplica(ShardId shardId, BulkShardRequest request } } - processAfter(request.refresh(), indexShard, location); + processAfterWrite(request.refresh(), indexShard, location); } private void applyVersion(BulkItemRequest item, long version, VersionType versionType) { diff --git a/core/src/main/java/org/elasticsearch/action/delete/DeleteResponse.java b/core/src/main/java/org/elasticsearch/action/delete/DeleteResponse.java index 26cfa57a13d39..1b79d96f114d3 100644 --- a/core/src/main/java/org/elasticsearch/action/delete/DeleteResponse.java +++ b/core/src/main/java/org/elasticsearch/action/delete/DeleteResponse.java @@ -19,9 +19,12 @@ package org.elasticsearch.action.delete; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; +import org.elasticsearch.index.shard.ShardId; import java.io.IOException; @@ -31,54 +34,19 @@ * @see org.elasticsearch.action.delete.DeleteRequest * @see org.elasticsearch.client.Client#delete(DeleteRequest) */ -public class DeleteResponse extends ActionWriteResponse { +public class DeleteResponse extends DocWriteResponse { - private String index; - private String id; - private String type; - private long version; private boolean found; public DeleteResponse() { } - public DeleteResponse(String index, String type, String id, long version, boolean found) { - this.index = index; - this.id = id; - this.type = type; - this.version = version; + public DeleteResponse(ShardId shardId, String type, String id, long seqNo, long version, boolean found) { + super(shardId, type, id, seqNo, version); this.found = found; } - /** - * The index the document was deleted from. - */ - public String getIndex() { - return this.index; - } - - /** - * The type of the document deleted. - */ - public String getType() { - return this.type; - } - - /** - * The id of the document deleted. - */ - public String getId() { - return this.id; - } - - /** - * The version of the delete operation. - */ - public long getVersion() { - return this.version; - } - /** * Returns true if a doc was found to delete. */ @@ -89,20 +57,23 @@ public boolean isFound() { @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); - index = in.readString(); - type = in.readString(); - id = in.readString(); - version = in.readLong(); found = in.readBoolean(); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeString(index); - out.writeString(type); - out.writeString(id); - out.writeLong(version); out.writeBoolean(found); } + + static final class Fields { + static final XContentBuilderString FOUND = new XContentBuilderString("found"); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(Fields.FOUND, isFound()); + super.toXContent(builder, params); + return builder; + } } diff --git a/core/src/main/java/org/elasticsearch/action/delete/TransportDeleteAction.java b/core/src/main/java/org/elasticsearch/action/delete/TransportDeleteAction.java index 227d8728dbe97..29d23f7cb5a3a 100644 --- a/core/src/main/java/org/elasticsearch/action/delete/TransportDeleteAction.java +++ b/core/src/main/java/org/elasticsearch/action/delete/TransportDeleteAction.java @@ -130,26 +130,34 @@ protected DeleteResponse newResponseInstance() { protected Tuple shardOperationOnPrimary(ClusterState clusterState, PrimaryOperationRequest shardRequest) { DeleteRequest request = shardRequest.request; IndexShard indexShard = indicesService.indexServiceSafe(shardRequest.shardId.getIndex()).getShard(shardRequest.shardId.id()); - Engine.Delete delete = indexShard.prepareDelete(request.type(), request.id(), request.version(), request.versionType(), Engine.Operation.Origin.PRIMARY); + final WriteResult result = executeDeleteRequestOnPrimary(request, indexShard); + + processAfterWrite(request.refresh(), indexShard, result.location); + + return new Tuple<>(result.response, shardRequest.request); + } + + public static WriteResult executeDeleteRequestOnPrimary(DeleteRequest request, IndexShard indexShard) { + Engine.Delete delete = indexShard.prepareDeleteOnPrimary(request.type(), request.id(), request.version(), request.versionType()); indexShard.delete(delete); // update the request with the version so it will go to the replicas request.versionType(delete.versionType().versionTypeForReplicationAndRecovery()); request.version(delete.version()); + request.seqNo(delete.seqNo()); assert request.versionType().validateVersionForWrites(request.version()); - processAfter(request.refresh(), indexShard, delete.getTranslogLocation()); - - DeleteResponse response = new DeleteResponse(shardRequest.shardId.getIndex(), request.type(), request.id(), delete.version(), delete.found()); - return new Tuple<>(response, shardRequest.request); + return new WriteResult<>(new DeleteResponse(indexShard.shardId(), request.type(), request.id(), + delete.seqNo(), delete.version(), delete.found()), delete.getTranslogLocation()); } @Override protected void shardOperationOnReplica(ShardId shardId, DeleteRequest request) { IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShard(shardId.id()); - Engine.Delete delete = indexShard.prepareDelete(request.type(), request.id(), request.version(), request.versionType(), Engine.Operation.Origin.REPLICA); + Engine.Delete delete = indexShard.prepareDeleteOnReplica(request.type(), request.id(), + request.seqNo(), request.version(), request.versionType()); indexShard.delete(delete); - processAfter(request.refresh(), indexShard, delete.getTranslogLocation()); + processAfterWrite(request.refresh(), indexShard, delete.getTranslogLocation()); } @Override diff --git a/core/src/main/java/org/elasticsearch/action/index/IndexResponse.java b/core/src/main/java/org/elasticsearch/action/index/IndexResponse.java index 5727b2b673b97..5765a7463731b 100644 --- a/core/src/main/java/org/elasticsearch/action/index/IndexResponse.java +++ b/core/src/main/java/org/elasticsearch/action/index/IndexResponse.java @@ -19,9 +19,12 @@ package org.elasticsearch.action.index; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; +import org.elasticsearch.index.shard.ShardId; import java.io.IOException; @@ -31,54 +34,18 @@ * @see org.elasticsearch.action.index.IndexRequest * @see org.elasticsearch.client.Client#index(IndexRequest) */ -public class IndexResponse extends ActionWriteResponse { +public class IndexResponse extends DocWriteResponse { - private String index; - private String id; - private String type; - private long version; private boolean created; public IndexResponse() { - } - public IndexResponse(String index, String type, String id, long version, boolean created) { - this.index = index; - this.id = id; - this.type = type; - this.version = version; + public IndexResponse(ShardId shardId, String type, String id, long seqNo, long version, boolean created) { + super(shardId, type, id, seqNo, version); this.created = created; } - /** - * The index the document was indexed into. - */ - public String getIndex() { - return this.index; - } - - /** - * The type of the document indexed. - */ - public String getType() { - return this.type; - } - - /** - * The id of the document indexed. - */ - public String getId() { - return this.id; - } - - /** - * Returns the current version of the doc indexed. - */ - public long getVersion() { - return this.version; - } - /** * Returns true if the document was created, false if updated. */ @@ -89,20 +56,12 @@ public boolean isCreated() { @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); - index = in.readString(); - type = in.readString(); - id = in.readString(); - version = in.readLong(); created = in.readBoolean(); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeString(index); - out.writeString(type); - out.writeString(id); - out.writeLong(version); out.writeBoolean(created); } @@ -110,12 +69,24 @@ public void writeTo(StreamOutput out) throws IOException { public String toString() { StringBuilder builder = new StringBuilder(); builder.append("IndexResponse["); - builder.append("index=").append(index); - builder.append(",type=").append(type); - builder.append(",id=").append(id); - builder.append(",version=").append(version); + builder.append("index=").append(getIndex()); + builder.append(",type=").append(getType()); + builder.append(",id=").append(getId()); + builder.append(",version=").append(getVersion()); builder.append(",created=").append(created); + builder.append(",seqNo=").append(getSeqNo()); builder.append(",shards=").append(getShardInfo()); return builder.append("]").toString(); } + + static final class Fields { + static final XContentBuilderString CREATED = new XContentBuilderString("created"); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + super.toXContent(builder, params); + builder.field(Fields.CREATED, isCreated()); + return builder; + } } diff --git a/core/src/main/java/org/elasticsearch/action/index/TransportIndexAction.java b/core/src/main/java/org/elasticsearch/action/index/TransportIndexAction.java index 63b82377d8a47..015b6a702f778 100644 --- a/core/src/main/java/org/elasticsearch/action/index/TransportIndexAction.java +++ b/core/src/main/java/org/elasticsearch/action/index/TransportIndexAction.java @@ -166,11 +166,11 @@ protected Tuple shardOperationOnPrimary(ClusterStat IndexService indexService = indicesService.indexServiceSafe(shardRequest.shardId.getIndex()); IndexShard indexShard = indexService.getShard(shardRequest.shardId.id()); - final WriteResult result = executeIndexRequestOnPrimary(null, request, indexShard); + final WriteResult result = executeIndexRequestOnPrimary(request, indexShard, mappingUpdatedAction); final IndexResponse response = result.response; final Translog.Location location = result.location; - processAfter(request.refresh(), indexShard, location); + processAfterWrite(request.refresh(), indexShard, location); return new Tuple<>(response, shardRequest.request); } @@ -181,13 +181,51 @@ protected void shardOperationOnReplica(ShardId shardId, IndexRequest request) { SourceToParse sourceToParse = SourceToParse.source(SourceToParse.Origin.REPLICA, request.source()).index(shardId.getIndex()).type(request.type()).id(request.id()) .routing(request.routing()).parent(request.parent()).timestamp(request.timestamp()).ttl(request.ttl()); - final Engine.Index operation = indexShard.prepareIndex(sourceToParse, request.version(), request.versionType(), Engine.Operation.Origin.REPLICA); + final Engine.Index operation = indexShard.prepareIndexOnReplica(sourceToParse, request.seqNo(), request.version(), request.versionType()); Mapping update = operation.parsedDoc().dynamicMappingsUpdate(); if (update != null) { throw new RetryOnReplicaException(shardId, "Mappings are not available on the replica yet, triggered update: " + update); } indexShard.index(operation); - processAfter(request.refresh(), indexShard, operation.getTranslogLocation()); + processAfterWrite(request.refresh(), indexShard, operation.getTranslogLocation()); } + /** utility method to prepare indexing operations on the primary */ + public static Engine.Index prepareIndexOperationOnPrimary(IndexRequest request, IndexShard indexShard) { + SourceToParse sourceToParse = SourceToParse.source(SourceToParse.Origin.PRIMARY, request.source()).index(request.index()).type(request.type()).id(request.id()) + .routing(request.routing()).parent(request.parent()).timestamp(request.timestamp()).ttl(request.ttl()); + return indexShard.prepareIndexOnPrimary(sourceToParse, request.version(), request.versionType()); + + } + + /** + * Execute the given {@link IndexRequest} on a primary shard, throwing a + * {@link RetryOnPrimaryException} if the operation needs to be re-tried. + */ + public static WriteResult executeIndexRequestOnPrimary(IndexRequest request, IndexShard indexShard, MappingUpdatedAction mappingUpdatedAction) throws Throwable { + Engine.Index operation = prepareIndexOperationOnPrimary(request, indexShard); + Mapping update = operation.parsedDoc().dynamicMappingsUpdate(); + final ShardId shardId = indexShard.shardId(); + if (update != null) { + final String indexName = shardId.getIndex(); + mappingUpdatedAction.updateMappingOnMasterSynchronously(indexName, request.type(), update); + operation = prepareIndexOperationOnPrimary(request, indexShard); + update = operation.parsedDoc().dynamicMappingsUpdate(); + if (update != null) { + throw new RetryOnPrimaryException(shardId, + "Dynamics mappings are not available on the node that holds the primary yet"); + } + } + final boolean created = indexShard.index(operation); + + // update the version on request so it will happen on the replicas + final long version = operation.version(); + request.version(version); + request.versionType(request.versionType().versionTypeForReplicationAndRecovery()); + request.seqNo(operation.seqNo()); + + assert request.versionType().validateVersionForWrites(request.version()); + + return new WriteResult<>(new IndexResponse(shardId, request.type(), request.id(), request.seqNo(), request.version(), created), operation.getTranslogLocation()); + } } diff --git a/core/src/main/java/org/elasticsearch/action/support/replication/ReplicationRequest.java b/core/src/main/java/org/elasticsearch/action/support/replication/ReplicationRequest.java index c629a70d6f973..e7c7664d10a38 100644 --- a/core/src/main/java/org/elasticsearch/action/support/replication/ReplicationRequest.java +++ b/core/src/main/java/org/elasticsearch/action/support/replication/ReplicationRequest.java @@ -44,6 +44,9 @@ public class ReplicationRequest extends ActionRequ ShardId internalShardId; + long seqNo; + long primaryTerm; + protected TimeValue timeout = DEFAULT_TIMEOUT; protected String index; @@ -76,6 +79,9 @@ protected ReplicationRequest(T request, ActionRequest originalRequest) { this.timeout = request.timeout(); this.index = request.index(); this.consistencyLevel = request.consistencyLevel(); + this.internalShardId = request.internalShardId; + this.seqNo = request.seqNo; + this.primaryTerm = request.primaryTerm; } /** @@ -141,6 +147,29 @@ public final T consistencyLevel(WriteConsistencyLevel consistencyLevel) { return (T) this; } + /** + * Returns the sequence number for this operation. The sequence number is assigned while the operation + * is performed on the primary shard. + */ + public long seqNo() { + return seqNo; + } + + /** sets the sequence number for this operation. should only be called on the primary shard */ + public void seqNo(long seqNo) { + this.seqNo = seqNo; + } + + /** returns the primary term active at the time the operation was performed on the primary shard */ + public long primaryTerm() { + return primaryTerm; + } + + /** marks the primary term in which the operation was performed */ + public void primaryTerm(long term) { + primaryTerm = term; + } + @Override public ActionRequestValidationException validate() { ActionRequestValidationException validationException = null; @@ -161,6 +190,8 @@ public void readFrom(StreamInput in) throws IOException { consistencyLevel = WriteConsistencyLevel.fromId(in.readByte()); timeout = TimeValue.readTimeValue(in); index = in.readString(); + seqNo = in.readVLong(); + primaryTerm = in.readVLong(); } @Override @@ -175,6 +206,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeByte(consistencyLevel.id()); timeout.writeTo(out); out.writeString(index); + out.writeVLong(seqNo); + out.writeVLong(primaryTerm); } public T setShardId(ShardId shardId) { diff --git a/core/src/main/java/org/elasticsearch/action/support/replication/TransportBroadcastReplicationAction.java b/core/src/main/java/org/elasticsearch/action/support/replication/TransportBroadcastReplicationAction.java index ddd4d42f7a68b..33a9d349e8064 100644 --- a/core/src/main/java/org/elasticsearch/action/support/replication/TransportBroadcastReplicationAction.java +++ b/core/src/main/java/org/elasticsearch/action/support/replication/TransportBroadcastReplicationAction.java @@ -22,9 +22,8 @@ import com.carrotsearch.hppc.cursors.IntObjectCursor; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.ShardOperationFailedException; -import org.elasticsearch.action.UnavailableShardsException; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.DefaultShardOperationFailedException; import org.elasticsearch.action.support.HandledTransportAction; @@ -53,7 +52,7 @@ * Base class for requests that should be executed on all shards of an index or several indices. * This action sends shard requests to all primary shards of the indices and they are then replicated like write requests */ -public abstract class TransportBroadcastReplicationAction extends HandledTransportAction { +public abstract class TransportBroadcastReplicationAction extends HandledTransportAction { private final TransportReplicationAction replicatedBroadcastShardAction; private final ClusterService clusterService; @@ -91,15 +90,15 @@ public void onFailure(Throwable e) { logger.trace("{}: got failure from {}", actionName, shardId); int totalNumCopies = clusterState.getMetaData().index(shardId.index().getName()).getNumberOfReplicas() + 1; ShardResponse shardResponse = newShardResponse(); - ActionWriteResponse.ShardInfo.Failure[] failures; + ReplicationResponse.ShardInfo.Failure[] failures; if (TransportActions.isShardNotAvailableException(e)) { - failures = new ActionWriteResponse.ShardInfo.Failure[0]; + failures = new ReplicationResponse.ShardInfo.Failure[0]; } else { - ActionWriteResponse.ShardInfo.Failure failure = new ActionWriteResponse.ShardInfo.Failure(shardId.index().name(), shardId.id(), null, e, ExceptionsHelper.status(e), true); - failures = new ActionWriteResponse.ShardInfo.Failure[totalNumCopies]; + ReplicationResponse.ShardInfo.Failure failure = new ReplicationResponse.ShardInfo.Failure(shardId.index().name(), shardId.id(), null, e, ExceptionsHelper.status(e), true); + failures = new ReplicationResponse.ShardInfo.Failure[totalNumCopies]; Arrays.fill(failures, failure); } - shardResponse.setShardInfo(new ActionWriteResponse.ShardInfo(totalNumCopies, 0, failures)); + shardResponse.setShardInfo(new ReplicationResponse.ShardInfo(totalNumCopies, 0, failures)); shardsResponses.add(shardResponse); if (responsesCountDown.countDown()) { finishAndNotifyListener(listener, shardsResponses); @@ -142,7 +141,7 @@ private void finishAndNotifyListener(ActionListener listener, CopyOnWriteArrayLi int totalNumCopies = 0; List shardFailures = null; for (int i = 0; i < shardsResponses.size(); i++) { - ActionWriteResponse shardResponse = shardsResponses.get(i); + ReplicationResponse shardResponse = shardsResponses.get(i); if (shardResponse == null) { // non active shard, ignore } else { @@ -152,7 +151,7 @@ private void finishAndNotifyListener(ActionListener listener, CopyOnWriteArrayLi if (shardFailures == null) { shardFailures = new ArrayList<>(); } - for (ActionWriteResponse.ShardInfo.Failure failure : shardResponse.getShardInfo().getFailures()) { + for (ReplicationResponse.ShardInfo.Failure failure : shardResponse.getShardInfo().getFailures()) { shardFailures.add(new DefaultShardOperationFailedException(new BroadcastShardOperationFailedException(new ShardId(failure.index(), failure.shardId()), failure.getCause()))); } } diff --git a/core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java b/core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java index 23bf21bd83a11..a1f5b5edcff6b 100644 --- a/core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java +++ b/core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java @@ -22,13 +22,9 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.UnavailableShardsException; import org.elasticsearch.action.WriteConsistencyLevel; -import org.elasticsearch.action.bulk.BulkShardRequest; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.index.IndexRequest.OpType; -import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.TransportAction; import org.elasticsearch.action.support.TransportActions; @@ -56,10 +52,7 @@ import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.index.IndexService; -import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.VersionConflictEngineException; -import org.elasticsearch.index.mapper.Mapping; -import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.translog.Translog; @@ -78,7 +71,7 @@ /** */ -public abstract class TransportReplicationAction extends TransportAction { +public abstract class TransportReplicationAction extends TransportAction { public static final String SHARD_FAILURE_TIMEOUT = "action.support.replication.shard.failure_timeout"; @@ -195,7 +188,7 @@ protected boolean isConflictException(Throwable e) { return false; } - protected static class WriteResult { + protected static class WriteResult { public final T response; public final Translog.Location location; @@ -206,10 +199,10 @@ public WriteResult(T response, Translog.Location location) { } @SuppressWarnings("unchecked") - public T response() { + public T response() { // this sets total, pending and failed to 0 and this is ok, because we will embed this into the replica // request and not use it - response.setShardInfo(new ActionWriteResponse.ShardInfo()); + response.setShardInfo(new ReplicationResponse.ShardInfo()); return (T) response; } @@ -314,7 +307,7 @@ protected void responseWithFailure(Throwable t) { @Override protected void doRun() throws Exception { - try (Releasable shardReference = getIndexShardOperationsCounter(request.internalShardId)) { + try (Releasable shardReference = getIndexShardOperationsCounter(request.internalShardId, request.primaryTerm)) { shardOperationOnReplica(request.internalShardId, request); } channel.sendResponse(TransportResponse.Empty.INSTANCE); @@ -577,9 +570,10 @@ void performOnPrimary(final ShardRouting primary, final ShardIterator shardsIt) } final ReplicationPhase replicationPhase; try { - indexShardReference = getIndexShardOperationsCounter(primary.shardId()); + indexShardReference = getIndexShardOperationsCounter(primary.shardId(), primary.primaryTerm()); PrimaryOperationRequest por = new PrimaryOperationRequest(primary.id(), internalRequest.concreteIndex(), internalRequest.request()); Tuple primaryResponse = shardOperationOnPrimary(observer.observedState(), por); + primaryResponse.v2().primaryTerm(primary.primaryTerm()); logger.trace("operation completed on primary [{}]", primary); replicationPhase = new ReplicationPhase(shardsIt, primaryResponse.v2(), primaryResponse.v1(), observer, primary, internalRequest, listener, indexShardReference, shardFailedTimeout); } catch (Throwable e) { @@ -664,10 +658,10 @@ void retryBecauseUnavailable(ShardId shardId, String message) { } - protected Releasable getIndexShardOperationsCounter(ShardId shardId) { + protected Releasable getIndexShardOperationsCounter(ShardId shardId, long opPrimaryTerm) { IndexService indexService = indicesService.indexServiceSafe(shardId.index().getName()); IndexShard indexShard = indexService.getShard(shardId.id()); - return new IndexShardReference(indexShard); + return new IndexShardReference(indexShard, opPrimaryTerm); } private void failReplicaIfNeeded(String index, int shardId, Throwable t) { @@ -961,20 +955,20 @@ private void doFinish() { if (finished.compareAndSet(false, true)) { Releasables.close(indexShardReference); final ShardId shardId = shardIt.shardId(); - final ActionWriteResponse.ShardInfo.Failure[] failuresArray; + final ReplicationResponse.ShardInfo.Failure[] failuresArray; if (!shardReplicaFailures.isEmpty()) { int slot = 0; - failuresArray = new ActionWriteResponse.ShardInfo.Failure[shardReplicaFailures.size()]; + failuresArray = new ReplicationResponse.ShardInfo.Failure[shardReplicaFailures.size()]; for (Map.Entry entry : shardReplicaFailures.entrySet()) { RestStatus restStatus = ExceptionsHelper.status(entry.getValue()); - failuresArray[slot++] = new ActionWriteResponse.ShardInfo.Failure( + failuresArray[slot++] = new ReplicationResponse.ShardInfo.Failure( shardId.getIndex(), shardId.getId(), entry.getKey(), entry.getValue(), restStatus, false ); } } else { - failuresArray = ActionWriteResponse.EMPTY; + failuresArray = ReplicationResponse.EMPTY; } - finalResponse.setShardInfo(new ActionWriteResponse.ShardInfo( + finalResponse.setShardInfo(new ReplicationResponse.ShardInfo( totalShards, success.get(), failuresArray @@ -1046,13 +1040,15 @@ public String concreteIndex() { } } + static class IndexShardReference implements Releasable { final private IndexShard counter; private final AtomicBoolean closed = new AtomicBoolean(false); - IndexShardReference(IndexShard counter) { - counter.incrementOperationCounter(); + IndexShardReference(IndexShard counter, long opPrimaryTerm) { + // this enforces primary terms, if we're lagging an exception will be thrown. + counter.incrementOperationCounter(opPrimaryTerm); this.counter = counter; } @@ -1064,44 +1060,8 @@ public void close() { } } - /** Utility method to create either an index or a create operation depending - * on the {@link OpType} of the request. */ - private final Engine.Index prepareIndexOperationOnPrimary(BulkShardRequest shardRequest, IndexRequest request, IndexShard indexShard) { - SourceToParse sourceToParse = SourceToParse.source(SourceToParse.Origin.PRIMARY, request.source()).index(request.index()).type(request.type()).id(request.id()) - .routing(request.routing()).parent(request.parent()).timestamp(request.timestamp()).ttl(request.ttl()); - return indexShard.prepareIndex(sourceToParse, request.version(), request.versionType(), Engine.Operation.Origin.PRIMARY); - - } - - /** Execute the given {@link IndexRequest} on a primary shard, throwing a - * {@link RetryOnPrimaryException} if the operation needs to be re-tried. */ - protected final WriteResult executeIndexRequestOnPrimary(BulkShardRequest shardRequest, IndexRequest request, IndexShard indexShard) throws Throwable { - Engine.Index operation = prepareIndexOperationOnPrimary(shardRequest, request, indexShard); - Mapping update = operation.parsedDoc().dynamicMappingsUpdate(); - final ShardId shardId = indexShard.shardId(); - if (update != null) { - final String indexName = shardId.getIndex(); - mappingUpdatedAction.updateMappingOnMasterSynchronously(indexName, request.type(), update); - operation = prepareIndexOperationOnPrimary(shardRequest, request, indexShard); - update = operation.parsedDoc().dynamicMappingsUpdate(); - if (update != null) { - throw new RetryOnPrimaryException(shardId, - "Dynamics mappings are not available on the node that holds the primary yet"); - } - } - final boolean created = indexShard.index(operation); - - // update the version on request so it will happen on the replicas - final long version = operation.version(); - request.version(version); - request.versionType(request.versionType().versionTypeForReplicationAndRecovery()); - - assert request.versionType().validateVersionForWrites(request.version()); - - return new WriteResult(new IndexResponse(shardId.getIndex(), request.type(), request.id(), request.version(), created), operation.getTranslogLocation()); - } - - protected final void processAfter(boolean refresh, IndexShard indexShard, Translog.Location location) { + /** utility method for common tasks that should be done after a write operation */ + public static void processAfterWrite(boolean refresh, IndexShard indexShard, Translog.Location location) { if (refresh) { try { indexShard.refresh("refresh_flag_index"); diff --git a/core/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java b/core/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java index 2a639c83ad1b4..b3c211f292d56 100644 --- a/core/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java +++ b/core/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java @@ -175,7 +175,8 @@ protected void shardOperation(final UpdateRequest request, final ActionListener< indexAction.execute(upsertRequest, new ActionListener() { @Override public void onResponse(IndexResponse response) { - UpdateResponse update = new UpdateResponse(response.getShardInfo(), response.getIndex(), response.getType(), response.getId(), response.getVersion(), response.isCreated()); + UpdateResponse update = new UpdateResponse(response.getShardInfo(), response.getShardId(), response.getType(), + response.getId(), response.getSeqNo(), response.getVersion(), response.isCreated()); if (request.fields() != null && request.fields().length > 0) { Tuple> sourceAndContent = XContentHelper.convertToMap(upsertSourceBytes, true); update.setGetResult(updateHelper.extractGetResult(request, request.concreteIndex(), response.getVersion(), sourceAndContent.v2(), sourceAndContent.v1(), upsertSourceBytes)); @@ -210,7 +211,8 @@ protected void doRun() { indexAction.execute(indexRequest, new ActionListener() { @Override public void onResponse(IndexResponse response) { - UpdateResponse update = new UpdateResponse(response.getShardInfo(), response.getIndex(), response.getType(), response.getId(), response.getVersion(), response.isCreated()); + UpdateResponse update = new UpdateResponse(response.getShardInfo(), response.getShardId(), response.getType(), response.getId(), + response.getSeqNo(), response.getVersion(), response.isCreated()); update.setGetResult(updateHelper.extractGetResult(request, request.concreteIndex(), response.getVersion(), result.updatedSourceAsMap(), result.updateSourceContentType(), indexSourceBytes)); listener.onResponse(update); } @@ -238,7 +240,8 @@ protected void doRun() { deleteAction.execute(deleteRequest, new ActionListener() { @Override public void onResponse(DeleteResponse response) { - UpdateResponse update = new UpdateResponse(response.getShardInfo(), response.getIndex(), response.getType(), response.getId(), response.getVersion(), false); + UpdateResponse update = new UpdateResponse(response.getShardInfo(), response.getShardId(), response.getType(), + response.getId(), response.getSeqNo(), response.getVersion(), false); update.setGetResult(updateHelper.extractGetResult(request, request.concreteIndex(), response.getVersion(), result.updatedSourceAsMap(), result.updateSourceContentType(), null)); listener.onResponse(update); } @@ -264,7 +267,7 @@ protected void doRun() { case NONE: UpdateResponse update = result.action(); IndexService indexServiceOrNull = indicesService.indexService(request.concreteIndex()); - if (indexServiceOrNull != null) { + if (indexServiceOrNull != null) { IndexShard shard = indexService.getShardOrNull(request.shardId()); if (shard != null) { shard.indexingService().noopUpdate(request.type()); diff --git a/core/src/main/java/org/elasticsearch/action/update/UpdateHelper.java b/core/src/main/java/org/elasticsearch/action/update/UpdateHelper.java index 010142b0b4c15..247c672b8edbd 100644 --- a/core/src/main/java/org/elasticsearch/action/update/UpdateHelper.java +++ b/core/src/main/java/org/elasticsearch/action/update/UpdateHelper.java @@ -83,9 +83,10 @@ public Result prepare(UpdateRequest request, IndexShard indexShard) { @SuppressWarnings("unchecked") protected Result prepare(UpdateRequest request, final GetResult getResult) { long getDateNS = System.nanoTime(); + final ShardId shardId = new ShardId(request.index(), request.shardId()); if (!getResult.isExists()) { if (request.upsertRequest() == null && !request.docAsUpsert()) { - throw new DocumentMissingException(new ShardId(request.index(), request.shardId()), request.type(), request.id()); + throw new DocumentMissingException(shardId, request.type(), request.id()); } IndexRequest indexRequest = request.docAsUpsert() ? request.doc() : request.upsertRequest(); Long ttl = indexRequest.ttl(); @@ -113,7 +114,7 @@ protected Result prepare(UpdateRequest request, final GetResult getResult) { logger.warn("Used upsert operation [{}] for script [{}], doing nothing...", scriptOpChoice, request.script.getScript()); } - UpdateResponse update = new UpdateResponse(getResult.getIndex(), getResult.getType(), getResult.getId(), + UpdateResponse update = new UpdateResponse(shardId, getResult.getType(), getResult.getId(), getResult.getVersion(), false); update.setGetResult(getResult); return new Result(update, Operation.NONE, upsertDoc, XContentType.JSON); @@ -145,7 +146,7 @@ protected Result prepare(UpdateRequest request, final GetResult getResult) { if (getResult.internalSourceRef() == null) { // no source, we can't do nothing, through a failure... - throw new DocumentSourceMissingException(new ShardId(request.index(), request.shardId()), request.type(), request.id()); + throw new DocumentSourceMissingException(shardId, request.type(), request.id()); } Tuple> sourceAndContent = XContentHelper.convertToMap(getResult.internalSourceRef(), true); @@ -231,12 +232,12 @@ protected Result prepare(UpdateRequest request, final GetResult getResult) { .consistencyLevel(request.consistencyLevel()); return new Result(deleteRequest, Operation.DELETE, updatedSourceAsMap, updateSourceContentType); } else if ("none".equals(operation)) { - UpdateResponse update = new UpdateResponse(getResult.getIndex(), getResult.getType(), getResult.getId(), getResult.getVersion(), false); + UpdateResponse update = new UpdateResponse(new ShardId(getResult.getIndex(), request.shardId()), getResult.getType(), getResult.getId(), getResult.getVersion(), false); update.setGetResult(extractGetResult(request, request.index(), getResult.getVersion(), updatedSourceAsMap, updateSourceContentType, getResult.internalSourceRef())); return new Result(update, Operation.NONE, updatedSourceAsMap, updateSourceContentType); } else { logger.warn("Used update operation [{}] for script [{}], doing nothing...", operation, request.script.getScript()); - UpdateResponse update = new UpdateResponse(getResult.getIndex(), getResult.getType(), getResult.getId(), getResult.getVersion(), false); + UpdateResponse update = new UpdateResponse(new ShardId(getResult.getIndex(), request.shardId()), getResult.getType(), getResult.getId(), getResult.getVersion(), false); return new Result(update, Operation.NONE, updatedSourceAsMap, updateSourceContentType); } } diff --git a/core/src/main/java/org/elasticsearch/action/update/UpdateResponse.java b/core/src/main/java/org/elasticsearch/action/update/UpdateResponse.java index af6438097c800..34e808e71c4a1 100644 --- a/core/src/main/java/org/elasticsearch/action/update/UpdateResponse.java +++ b/core/src/main/java/org/elasticsearch/action/update/UpdateResponse.java @@ -19,21 +19,21 @@ package org.elasticsearch.action.update; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.index.get.GetResult; +import org.elasticsearch.index.seqno.SequenceNumbersService; +import org.elasticsearch.index.shard.ShardId; import java.io.IOException; /** */ -public class UpdateResponse extends ActionWriteResponse { +public class UpdateResponse extends DocWriteResponse { - private String index; - private String id; - private String type; - private long version; private boolean created; private GetResult getResult; @@ -44,47 +44,16 @@ public UpdateResponse() { * Constructor to be used when a update didn't translate in a write. * For example: update script with operation set to none */ - public UpdateResponse(String index, String type, String id, long version, boolean created) { - this(new ShardInfo(0, 0), index, type, id, version, created); + public UpdateResponse(ShardId shardId, String type, String id, long version, boolean created) { + this(new ShardInfo(0, 0), shardId, type, id, SequenceNumbersService.UNASSIGNED_SEQ_NO, version, created); } - public UpdateResponse(ShardInfo shardInfo, String index, String type, String id, long version, boolean created) { + public UpdateResponse(ShardInfo shardInfo, ShardId shardId, String type, String id, long seqNo, long version, boolean created) { + super(shardId, type, id, seqNo, version); setShardInfo(shardInfo); - this.index = index; - this.id = id; - this.type = type; - this.version = version; this.created = created; } - /** - * The index the document was indexed into. - */ - public String getIndex() { - return this.index; - } - - /** - * The type of the document indexed. - */ - public String getType() { - return this.type; - } - - /** - * The id of the document indexed. - */ - public String getId() { - return this.id; - } - - /** - * Returns the current version of the doc indexed. - */ - public long getVersion() { - return this.version; - } - public void setGetResult(GetResult getResult) { this.getResult = getResult; } @@ -104,10 +73,6 @@ public boolean isCreated() { @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); - index = in.readString(); - type = in.readString(); - id = in.readString(); - version = in.readLong(); created = in.readBoolean(); if (in.readBoolean()) { getResult = GetResult.readGetResult(in); @@ -117,10 +82,6 @@ public void readFrom(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeString(index); - out.writeString(type); - out.writeString(id); - out.writeLong(version); out.writeBoolean(created); if (getResult == null) { out.writeBoolean(false); @@ -129,4 +90,19 @@ public void writeTo(StreamOutput out) throws IOException { getResult.writeTo(out); } } + + static final class Fields { + static final XContentBuilderString GET = new XContentBuilderString("get"); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + super.toXContent(builder, params); + if (getGetResult() != null) { + builder.startObject(Fields.GET); + getGetResult().toXContentEmbedded(builder, params); + builder.endObject(); + } + return builder; + } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java b/core/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java index d5f9bd602e70e..13a4cc66b8419 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java @@ -191,6 +191,15 @@ public boolean relocating() { return state == ShardRoutingState.RELOCATING; } + /** + * Returns true if this shard is a relocation target for another shard (i.e., was created with {@link #buildTargetRelocatingShard()} + * + */ + public boolean isRelocationTarget() { + return state == ShardRoutingState.INITIALIZING && relocatingNodeId != null; + } + + /** * Returns true iff this shard is assigned to a node ie. not * {@link ShardRoutingState#UNASSIGNED unassigned}. Otherwise false diff --git a/core/src/main/java/org/elasticsearch/index/engine/Engine.java b/core/src/main/java/org/elasticsearch/index/engine/Engine.java index 0504fdfa99694..bccae2e46642a 100644 --- a/core/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/core/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -45,6 +45,7 @@ import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.merge.MergeStats; +import org.elasticsearch.index.seqno.SequenceNumbersService; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.store.Store; import org.elasticsearch.index.translog.Translog; @@ -577,7 +578,8 @@ public interface EventListener { /** * Called when a fatal exception occurred */ - default void onFailedEngine(String reason, @Nullable Throwable t) {} + default void onFailedEngine(String reason, @Nullable Throwable t) { + } } public static class Searcher implements Releasable { @@ -602,7 +604,7 @@ public IndexReader reader() { } public DirectoryReader getDirectoryReader() { - if (reader() instanceof DirectoryReader) { + if (reader() instanceof DirectoryReader) { return (DirectoryReader) reader(); } throw new IllegalStateException("Can't use " + reader().getClass() + " as a directory reader"); @@ -621,14 +623,18 @@ public void close() { public static abstract class Operation { private final Term uid; private long version; + private long seqNo; private final VersionType versionType; private final Origin origin; private Translog.Location location; private final long startTime; private long endTime; - public Operation(Term uid, long version, VersionType versionType, Origin origin, long startTime) { + public Operation(Term uid, long seqNo, long version, VersionType versionType, Origin origin, long startTime) { this.uid = uid; + assert origin != Origin.PRIMARY || seqNo == SequenceNumbersService.UNASSIGNED_SEQ_NO : "seqNo should not be set when origin is PRIMARY"; + assert origin == Origin.PRIMARY || seqNo >= 0 : "seqNo should be set when origin is not PRIMARY"; + this.seqNo = seqNo; this.version = version; this.versionType = versionType; this.origin = origin; @@ -657,6 +663,14 @@ public void updateVersion(long version) { this.version = version; } + public long seqNo() { + return seqNo; + } + + public void updateSeqNo(long seqNo) { + this.seqNo = seqNo; + } + public void setTranslogLocation(Translog.Location location) { this.location = location; } @@ -692,8 +706,8 @@ public static class Index extends Operation { private final ParsedDocument doc; - public Index(Term uid, ParsedDocument doc, long version, VersionType versionType, Origin origin, long startTime) { - super(uid, version, versionType, origin, startTime); + public Index(Term uid, ParsedDocument doc, long seqNo, long version, VersionType versionType, Origin origin, long startTime) { + super(uid, seqNo, version, versionType, origin, startTime); this.doc = doc; } @@ -702,7 +716,7 @@ public Index(Term uid, ParsedDocument doc) { } public Index(Term uid, ParsedDocument doc, long version) { - this(uid, doc, version, VersionType.INTERNAL, Origin.PRIMARY, System.nanoTime()); + this(uid, doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, version, VersionType.INTERNAL, Origin.PRIMARY, System.nanoTime()); } public ParsedDocument parsedDoc() { @@ -735,6 +749,12 @@ public void updateVersion(long version) { this.doc.version().setLongValue(version); } + @Override + public void updateSeqNo(long seqNo) { + super.updateSeqNo(seqNo); + this.doc.seqNo().setLongValue(seqNo); + } + public String parent() { return this.doc.parent(); } @@ -753,19 +773,15 @@ public static class Delete extends Operation { private final String id; private boolean found; - public Delete(String type, String id, Term uid, long version, VersionType versionType, Origin origin, long startTime, boolean found) { - super(uid, version, versionType, origin, startTime); + public Delete(String type, String id, Term uid, long seqNo, long version, VersionType versionType, Origin origin, long startTime, boolean found) { + super(uid, seqNo, version, versionType, origin, startTime); this.type = type; this.id = id; this.found = found; } public Delete(String type, String id, Term uid) { - this(type, id, uid, Versions.MATCH_ANY, VersionType.INTERNAL, Origin.PRIMARY, System.nanoTime(), false); - } - - public Delete(Delete template, VersionType versionType) { - this(template.type(), template.id(), template.uid(), template.version(), versionType, template.origin(), template.startTime(), template.found()); + this(type, id, uid, SequenceNumbersService.UNASSIGNED_SEQ_NO, Versions.MATCH_ANY, VersionType.INTERNAL, Origin.PRIMARY, System.nanoTime(), false); } public String type() { @@ -1060,6 +1076,7 @@ public void onSettingsChanged() { * Returns the timestamp of the last write in nanoseconds. * Note: this time might not be absolutely accurate since the {@link Operation#startTime()} is used which might be * slightly inaccurate. + * * @see System#nanoTime() * @see Operation#startTime() */ @@ -1069,12 +1086,14 @@ public long getLastWriteNanos() { /** * Called for each new opened engine searcher to warm new segments + * * @see EngineConfig#getWarmer() */ public interface Warmer { /** * Called once a new Searcher is opened. - * @param searcher the searcer to warm + * + * @param searcher the searcer to warm * @param isTopLevelReader true iff the searcher is build from a top-level reader. * Otherwise the searcher might be build from a leaf reader to warm in isolation */ diff --git a/core/src/main/java/org/elasticsearch/index/engine/EngineConfig.java b/core/src/main/java/org/elasticsearch/index/engine/EngineConfig.java index c5da8e83b3db8..a7e2e9b8854d8 100644 --- a/core/src/main/java/org/elasticsearch/index/engine/EngineConfig.java +++ b/core/src/main/java/org/elasticsearch/index/engine/EngineConfig.java @@ -73,6 +73,7 @@ public final class EngineConfig { private final QueryCache queryCache; private final QueryCachingPolicy queryCachingPolicy; + /** * Index setting for compound file on flush. This setting is realtime updateable. */ @@ -123,7 +124,8 @@ public EngineConfig(ShardId shardId, ThreadPool threadPool, ShardIndexingService this.indexSettings = indexSettings; this.threadPool = threadPool; this.indexingService = indexingService; - this.warmer = warmer == null ? (a,b) -> {} : warmer; + this.warmer = warmer == null ? (a, b) -> { + } : warmer; this.store = store; this.deletionPolicy = deletionPolicy; this.mergePolicy = mergePolicy; @@ -226,9 +228,9 @@ public long getGcDeletesInMillis() { * Returns true iff delete garbage collection in the engine should be enabled. This setting is updateable * in realtime and forces a volatile read. Consumers can safely read this value directly go fetch it's latest value. The default is true *

- * Engine GC deletion if enabled collects deleted documents from in-memory realtime data structures after a certain amount of - * time ({@link #getGcDeletesInMillis()} if enabled. Before deletes are GCed they will cause re-adding the document that was deleted - * to fail. + * Engine GC deletion if enabled collects deleted documents from in-memory realtime data structures after a certain amount of + * time ({@link #getGcDeletesInMillis()} if enabled. Before deletes are GCed they will cause re-adding the document that was deleted + * to fail. *

*/ public boolean isEnableGcDeletes() { @@ -238,7 +240,7 @@ public boolean isEnableGcDeletes() { /** * Returns the {@link Codec} used in the engines {@link org.apache.lucene.index.IndexWriter} *

- * Note: this settings is only read on startup. + * Note: this settings is only read on startup. *

*/ public Codec getCodec() { @@ -259,7 +261,6 @@ public ThreadPool getThreadPool() { * * @see org.elasticsearch.index.indexing.ShardIndexingService#postIndex(Engine.Index) * @see org.elasticsearch.index.indexing.ShardIndexingService#preIndex(Engine.Index) - * */ public ShardIndexingService getIndexingService() { return indexingService; @@ -323,7 +324,9 @@ public IndexSettings getIndexSettings() { /** * Returns the engines shard ID */ - public ShardId getShardId() { return shardId; } + public ShardId getShardId() { + return shardId; + } /** * Returns the analyzer as the default analyzer in the engines {@link org.apache.lucene.index.IndexWriter} @@ -404,6 +407,7 @@ public boolean isCreate() { * should be automatically flushed. This is used to free up transient disk usage of potentially large segments that * are written after the engine became inactive from an indexing perspective. */ - public TimeValue getFlushMergesAfter() { return flushMergesAfter; } - + public TimeValue getFlushMergesAfter() { + return flushMergesAfter; + } } diff --git a/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index 1404b61b8ec8e..dbb62a735a064 100644 --- a/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -50,6 +50,7 @@ import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.merge.MergeStats; import org.elasticsearch.index.merge.OnGoingMerge; +import org.elasticsearch.index.seqno.SequenceNumbersService; import org.elasticsearch.index.shard.ElasticsearchMergePolicy; import org.elasticsearch.index.shard.MergeSchedulerConfig; import org.elasticsearch.index.shard.ShardId; @@ -102,6 +103,8 @@ public class InternalEngine extends Engine { private final IndexThrottle throttle; + private final SequenceNumbersService seqNoService; + public InternalEngine(EngineConfig engineConfig, boolean skipInitialTranslogRecovery) throws EngineException { super(engineConfig); this.versionMap = new LiveVersionMap(); @@ -115,6 +118,7 @@ public InternalEngine(EngineConfig engineConfig, boolean skipInitialTranslogReco this.lastDeleteVersionPruneTimeMSec = engineConfig.getThreadPool().estimatedTimeInMillis(); this.indexingService = engineConfig.getIndexingService(); this.warmer = engineConfig.getWarmer(); + seqNoService = new SequenceNumbersService(shardId, engineConfig.getIndexSettings()); mergeScheduler = scheduler = new EngineMergeScheduler(engineConfig.getShardId(), engineConfig.getIndexSettings(), engineConfig.getMergeSchedulerConfig()); this.dirtyLocks = new Object[Runtime.getRuntime().availableProcessors() * 10]; // we multiply it to have enough... for (int i = 0; i < dirtyLocks.length; i++) { @@ -344,6 +348,10 @@ public boolean index(Index index) { } catch (OutOfMemoryError | IllegalStateException | IOException t) { maybeFailEngine("index", t); throw new IndexFailedEngineException(shardId, index.type(), index.id(), t); + } finally { + if (index.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) { + seqNoService.markSeqNoAsCompleted(index.seqNo()); + } } checkVersionMapRefresh(); return created; @@ -380,6 +388,9 @@ private boolean innerIndex(Index index) throws IOException { final boolean created; index.updateVersion(updatedVersion); + if (index.origin() == Operation.Origin.PRIMARY) { + index.updateSeqNo(seqNoService.generateSeqNo()); + } if (currentVersion == Versions.NOT_FOUND) { // document does not exists, we can optimize for create @@ -447,6 +458,10 @@ public void delete(Delete delete) throws EngineException { } catch (OutOfMemoryError | IllegalStateException | IOException t) { maybeFailEngine("delete", t); throw new DeleteFailedEngineException(shardId, delete, t); + } finally { + if (delete.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) { + seqNoService.markSeqNoAsCompleted(delete.seqNo()); + } } maybePruneDeletedTombstones(); @@ -490,6 +505,11 @@ private void innerDelete(Delete delete) throws IOException { } } updatedVersion = delete.versionType().updateVersion(currentVersion, expectedVersion); + + if (delete.origin() == Operation.Origin.PRIMARY) { + delete.updateSeqNo(seqNoService.generateSeqNo()); + } + final boolean found; if (currentVersion == Versions.NOT_FOUND) { // doc does not exist and no prior deletes diff --git a/core/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index 1be873d3e9748..0a888731cf103 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -35,18 +35,7 @@ import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.mapper.internal.AllFieldMapper; -import org.elasticsearch.index.mapper.internal.FieldNamesFieldMapper; -import org.elasticsearch.index.mapper.internal.IdFieldMapper; -import org.elasticsearch.index.mapper.internal.IndexFieldMapper; -import org.elasticsearch.index.mapper.internal.ParentFieldMapper; -import org.elasticsearch.index.mapper.internal.RoutingFieldMapper; -import org.elasticsearch.index.mapper.internal.SourceFieldMapper; -import org.elasticsearch.index.mapper.internal.TTLFieldMapper; -import org.elasticsearch.index.mapper.internal.TimestampFieldMapper; -import org.elasticsearch.index.mapper.internal.TypeFieldMapper; -import org.elasticsearch.index.mapper.internal.UidFieldMapper; -import org.elasticsearch.index.mapper.internal.VersionFieldMapper; +import org.elasticsearch.index.mapper.internal.*; import org.elasticsearch.index.mapper.object.ObjectMapper; import org.elasticsearch.index.mapper.object.RootObjectMapper; import org.elasticsearch.script.ExecutableScript; @@ -107,6 +96,7 @@ public Builder(Settings indexSettings, RootObjectMapper.Builder builder, MapperS this.rootMappers.put(TimestampFieldMapper.class, new TimestampFieldMapper(indexSettings, mapperService.fullName(TimestampFieldMapper.NAME))); this.rootMappers.put(TTLFieldMapper.class, new TTLFieldMapper(indexSettings)); this.rootMappers.put(VersionFieldMapper.class, new VersionFieldMapper(indexSettings)); + this.rootMappers.put(SeqNoFieldMapper.class, new SeqNoFieldMapper(indexSettings)); this.rootMappers.put(ParentFieldMapper.class, new ParentFieldMapper(indexSettings, mapperService.fullName(ParentFieldMapper.NAME), /* parent type */builder.name())); // _field_names last so that it can see all other fields this.rootMappers.put(FieldNamesFieldMapper.class, new FieldNamesFieldMapper(indexSettings, mapperService.fullName(FieldNamesFieldMapper.NAME))); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index de4dc387c8891..87dfce192b6e9 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -28,8 +28,6 @@ import org.elasticsearch.common.joda.FormatDateTimeFormatter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ReleasableLock; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.core.DateFieldMapper.DateFieldType; @@ -47,7 +45,6 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; /** A parser for documents, given mappings from a DocumentMapper */ @@ -184,7 +181,7 @@ private ParsedDocument innerParseDocument(SourceToParse source) throws MapperPar update = mapping.mappingUpdate(rootDynamicUpdate); } - ParsedDocument doc = new ParsedDocument(context.uid(), context.version(), context.id(), context.type(), source.routing(), source.timestamp(), source.ttl(), context.docs(), + ParsedDocument doc = new ParsedDocument(context.uid(), context.version(), context.seqNo(), context.id(), context.type(), source.routing(), source.timestamp(), source.ttl(), context.docs(), context.source(), update).parent(source.parent()); // reset the context to free up memory context.reset(null, null, null); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/ParseContext.java b/core/src/main/java/org/elasticsearch/index/mapper/ParseContext.java index edf75621c1e3e..b68f7d85dc471 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/ParseContext.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/ParseContext.java @@ -323,6 +323,16 @@ public void version(Field version) { in.version(version); } + @Override + public Field seqNo() { + return in.seqNo(); + } + + @Override + public void seqNo(Field seqNo) { + in.seqNo(seqNo); + } + @Override public AllEntries allEntries() { return in.allEntries(); @@ -386,7 +396,7 @@ public static class InternalParseContext extends ParseContext { private String id; - private Field uid, version; + private Field uid, version, seqNo; private StringBuilder stringBuilder = new StringBuilder(); @@ -564,6 +574,17 @@ public void version(Field version) { this.version = version; } + @Override + public Field seqNo() { + return this.seqNo; + } + + @Override + public void seqNo(Field seqNo) { + this.seqNo = seqNo; + } + + @Override public AllEntries allEntries() { return this.allEntries; @@ -730,6 +751,10 @@ public boolean isWithinMultiFields() { public abstract void version(Field version); + public abstract Field seqNo(); + + public abstract void seqNo(Field seqNo); + public final boolean includeInAll(Boolean includeInAll, FieldMapper mapper) { return includeInAll(includeInAll, mapper.fieldType().indexOptions() != IndexOptions.NONE); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java b/core/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java index ed8314c6f7de8..ea0b786354d46 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java @@ -30,7 +30,7 @@ */ public class ParsedDocument { - private final Field uid, version; + private final Field uid, version, seqNo; private final String id; @@ -50,9 +50,10 @@ public class ParsedDocument { private String parent; - public ParsedDocument(Field uid, Field version, String id, String type, String routing, long timestamp, long ttl, List documents, BytesReference source, Mapping dynamicMappingsUpdate) { + public ParsedDocument(Field uid, Field version, Field seqNo, String id, String type, String routing, long timestamp, long ttl, List documents, BytesReference source, Mapping dynamicMappingsUpdate) { this.uid = uid; this.version = version; + this.seqNo = seqNo; this.id = id; this.type = type; this.routing = routing; @@ -71,6 +72,10 @@ public Field version() { return version; } + public Field seqNo() { + return seqNo; + } + public String id() { return this.id; } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/SeqNoFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/SeqNoFieldMapper.java new file mode 100644 index 0000000000000..8f0709e986eef --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/SeqNoFieldMapper.java @@ -0,0 +1,151 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper.internal; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.DocValuesType; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.fielddata.FieldDataType; +import org.elasticsearch.index.mapper.*; +import org.elasticsearch.index.mapper.ParseContext.Document; +import org.elasticsearch.index.seqno.SequenceNumbersService; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** Mapper for the _version field. */ +public class SeqNoFieldMapper extends MetadataFieldMapper { + + public static final String NAME = "_seq_no"; + public static final String CONTENT_TYPE = "_seq_no"; + + public static class Defaults { + + public static final String NAME = SeqNoFieldMapper.NAME; + public static final MappedFieldType FIELD_TYPE = new SeqNoFieldType(); + + static { + FIELD_TYPE.setNames(new MappedFieldType.Names(NAME)); + FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC); + FIELD_TYPE.setHasDocValues(true); + FIELD_TYPE.freeze(); + } + } + + public static class Builder extends MetadataFieldMapper.Builder { + + public Builder() { + super(Defaults.NAME, Defaults.FIELD_TYPE); + } + + @Override + public SeqNoFieldMapper build(BuilderContext context) { + return new SeqNoFieldMapper(context.indexSettings()); + } + } + + public static class TypeParser implements Mapper.TypeParser { + @Override + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { + throw new MapperParsingException(NAME + " is not configurable"); + } + } + + static final class SeqNoFieldType extends MappedFieldType { + + public SeqNoFieldType() { + setFieldDataType(new FieldDataType("long")); + } + + protected SeqNoFieldType(SeqNoFieldType ref) { + super(ref); + } + + @Override + public MappedFieldType clone() { + return new SeqNoFieldType(this); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public Long value(Object value) { + if (value == null || (value instanceof Long)) { + return (Long) value; + } else { + return Long.parseLong(value.toString()); + } + } + } + + public SeqNoFieldMapper(Settings indexSettings) { + super(NAME, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE, indexSettings); + } + + @Override + public void preParse(ParseContext context) throws IOException { + super.parse(context); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + // see InternalEngine.updateVersion to see where the real version value is set + final Field seqNo = new NumericDocValuesField(NAME, SequenceNumbersService.UNASSIGNED_SEQ_NO); + context.seqNo(seqNo); + fields.add(seqNo); + } + + @Override + public Mapper parse(ParseContext context) throws IOException { + // _seqno added in preparse + return null; + } + + @Override + public void postParse(ParseContext context) throws IOException { + // In the case of nested docs, let's fill nested docs with seqNo=1 so that Lucene doesn't write a Bitset for documents + // that don't have the field. This is consistent with the default value for efficiency. + for (int i = 1; i < context.docs().size(); i++) { + final Document doc = context.docs().get(i); + doc.add(new NumericDocValuesField(NAME, 1L)); + } + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return builder; + } + + @Override + public void merge(Mapper mergeWith, MergeResult mergeResult) throws MergeMappingException { + // nothing to do + } +} diff --git a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java new file mode 100644 index 0000000000000..46b033622432b --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java @@ -0,0 +1,60 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.seqno; + +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.shard.AbstractIndexShardComponent; +import org.elasticsearch.index.shard.ShardId; + +import java.util.concurrent.atomic.AtomicLong; + +/** a very light weight implementation. will be replaced with proper machinery later */ +public class SequenceNumbersService extends AbstractIndexShardComponent { + + public final static long UNASSIGNED_SEQ_NO = -1L; + + AtomicLong seqNoGenerator = new AtomicLong(); + + public SequenceNumbersService(ShardId shardId, IndexSettings indexSettings) { + super(shardId, indexSettings); + } + + /** + * generates a new sequence number. + * Note: you must call {@link #markSeqNoAsCompleted(long)} after the operation for which this seq# was generated + * was completed (whether successfully or with a failure + */ + public long generateSeqNo() { + return seqNoGenerator.getAndIncrement(); + } + + public void markSeqNoAsCompleted(long seqNo) { + // this is temporary to make things semi sane on primary promotion and recovery. will be replaced with better machinery + boolean success; + do { + long maxSeqNo = seqNoGenerator.get(); + if (seqNo > maxSeqNo) { + success = seqNoGenerator.compareAndSet(maxSeqNo, seqNo); + } else { + success = true; + } + } while (success == false); + } + +} diff --git a/core/src/main/java/org/elasticsearch/index/shard/IllegalIndexShardStateException.java b/core/src/main/java/org/elasticsearch/index/shard/IllegalIndexShardStateException.java index 31c235e09ecd1..e632c0669f6cb 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/IllegalIndexShardStateException.java +++ b/core/src/main/java/org/elasticsearch/index/shard/IllegalIndexShardStateException.java @@ -20,7 +20,6 @@ package org.elasticsearch.index.shard; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.rest.RestStatus; @@ -34,12 +33,12 @@ public class IllegalIndexShardStateException extends ElasticsearchException { private final IndexShardState currentState; - public IllegalIndexShardStateException(ShardId shardId, IndexShardState currentState, String msg) { - this(shardId, currentState, msg, null); + public IllegalIndexShardStateException(ShardId shardId, IndexShardState currentState, String msg, Object... args) { + this(shardId, currentState, msg, null, args); } - public IllegalIndexShardStateException(ShardId shardId, IndexShardState currentState, String msg, Throwable ex) { - super("CurrentState[" + currentState + "] " + msg, ex); + public IllegalIndexShardStateException(ShardId shardId, IndexShardState currentState, String msg, Throwable ex, Object... args) { + super("CurrentState[" + currentState + "] " + msg, ex, args); setShard(shardId); this.currentState = currentState; } diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 2370ace0464e9..8af89deac9fdb 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -19,10 +19,7 @@ package org.elasticsearch.index.shard; -import org.apache.lucene.index.CheckIndex; -import org.apache.lucene.index.IndexCommit; -import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; -import org.apache.lucene.index.SnapshotDeletionPolicy; +import org.apache.lucene.index.*; import org.apache.lucene.search.QueryCachingPolicy; import org.apache.lucene.search.UsageTrackingQueryCachingPolicy; import org.apache.lucene.store.AlreadyClosedException; @@ -82,6 +79,7 @@ import org.elasticsearch.index.refresh.RefreshStats; import org.elasticsearch.index.search.stats.SearchStats; import org.elasticsearch.index.search.stats.ShardSearchStats; +import org.elasticsearch.index.seqno.SequenceNumbersService; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.snapshots.IndexShardRepository; import org.elasticsearch.index.store.Store; @@ -194,8 +192,10 @@ public class IndexShard extends AbstractIndexShardComponent { private final IndexSearcherWrapper searcherWrapper; private final TimeValue inactiveTime; - /** True if this shard is still indexing (recently) and false if we've been idle for long enough (as periodically checked by {@link - * IndexingMemoryController}). */ + /** + * True if this shard is still indexing (recently) and false if we've been idle for long enough (as periodically checked by {@link + * IndexingMemoryController}). + */ private final AtomicBoolean active = new AtomicBoolean(); public IndexShard(ShardId shardId, IndexSettings indexSettings, ShardPath path, Store store, IndexCache indexCache, @@ -223,7 +223,7 @@ public IndexShard(ShardId shardId, IndexSettings indexSettings, ShardPath path, this.termVectorsService = provider.getTermVectorsService(); this.searchService = new ShardSearchStats(settings); this.shardWarmerService = new ShardIndexWarmerService(shardId, indexSettings); - this.indicesQueryCache = provider.getIndicesQueryCache(); + this.indicesQueryCache = provider.getIndicesQueryCache(); this.shardQueryCache = new ShardRequestCache(shardId, indexSettings); this.shardFieldData = new ShardFieldData(); this.indexFieldDataService = indexFieldDataService; @@ -233,6 +233,7 @@ public IndexShard(ShardId shardId, IndexSettings indexSettings, ShardPath path, this.flushOnClose = settings.getAsBoolean(INDEX_FLUSH_ON_CLOSE, true); this.path = path; this.mergePolicyConfig = new MergePolicyConfig(logger, settings); + /* create engine config */ logger.debug("state: [CREATED]"); @@ -445,22 +446,38 @@ private IndexShardState changeState(IndexShardState newState, String reason) { return previousState; } - public Engine.Index prepareIndex(SourceToParse source, long version, VersionType versionType, Engine.Operation.Origin origin) { + public Engine.Index prepareIndexOnPrimary(SourceToParse source, long version, VersionType versionType) { try { - return prepareIndex(docMapper(source.type()), source, version, versionType, origin); + if (shardRouting.primary() == false) { + throw new IllegalIndexShardStateException(shardId, state, "shard is not a primary"); + } + return prepareIndex(docMapper(source.type()), source, SequenceNumbersService.UNASSIGNED_SEQ_NO, version, versionType, Engine.Operation.Origin.PRIMARY); } catch (Throwable t) { verifyNotClosed(t); throw t; } } - static Engine.Index prepareIndex(DocumentMapperForType docMapper, SourceToParse source, long version, VersionType versionType, Engine.Operation.Origin origin) { + public Engine.Index prepareIndexOnReplica(SourceToParse source, long seqNo, long version, VersionType versionType) { + try { + if (shardRouting.primary() && shardRouting.isRelocationTarget() == false) { + throw new IllegalIndexShardStateException(shardId, state, "shard is not a replica"); + } + return prepareIndex(docMapper(source.type()), source, seqNo, version, versionType, Engine.Operation.Origin.REPLICA); + } catch (Throwable t) { + verifyNotClosed(t); + throw t; + } + } + + static Engine.Index prepareIndex(DocumentMapperForType docMapper, SourceToParse source, long seqNo, long version, VersionType versionType, Engine.Operation.Origin origin) { long startTime = System.nanoTime(); ParsedDocument doc = docMapper.getDocumentMapper().parse(source); if (docMapper.getMapping() != null) { doc.addDynamicMappingsUpdate(docMapper.getMapping()); } - return new Engine.Index(docMapper.getDocumentMapper().uidMapper().term(doc.uid().stringValue()), doc, version, versionType, origin, startTime); + doc.seqNo().setLongValue(seqNo); + return new Engine.Index(docMapper.getDocumentMapper().uidMapper().term(doc.uid().stringValue()), doc, seqNo, version, versionType, origin, startTime); } /** @@ -486,10 +503,27 @@ public boolean index(Engine.Index index) { return created; } - public Engine.Delete prepareDelete(String type, String id, long version, VersionType versionType, Engine.Operation.Origin origin) { - long startTime = System.nanoTime(); + public Engine.Delete prepareDeleteOnPrimary(String type, String id, long version, VersionType versionType) { + if (shardRouting.primary() == false) { + throw new IllegalIndexShardStateException(shardId, state, "shard is not a primary"); + } + final DocumentMapper documentMapper = docMapper(type).getDocumentMapper(); + final Term uid = documentMapper.uidMapper().term(Uid.createUid(type, id)); + return prepareDelete(type, id, uid, SequenceNumbersService.UNASSIGNED_SEQ_NO, version, versionType, Engine.Operation.Origin.PRIMARY); + } + + public Engine.Delete prepareDeleteOnReplica(String type, String id, long seqNo, long version, VersionType versionType) { + if (shardRouting.primary()) { + throw new IllegalIndexShardStateException(shardId, state, "shard is not a replica"); + } final DocumentMapper documentMapper = docMapper(type).getDocumentMapper(); - return new Engine.Delete(type, id, documentMapper.uidMapper().term(Uid.createUid(type, id)), version, versionType, origin, startTime, false); + final Term uid = documentMapper.uidMapper().term(Uid.createUid(type, id)); + return prepareDelete(type, id, uid, seqNo, version, versionType, Engine.Operation.Origin.REPLICA); + } + + static Engine.Delete prepareDelete(String type, String id, Term uid, long seqNo, long version, VersionType versionType, Engine.Operation.Origin origin) { + long startTime = System.nanoTime(); + return new Engine.Delete(type, id, uid, seqNo, version, versionType, origin, startTime, false); } public void delete(Engine.Delete delete) { @@ -533,11 +567,8 @@ public FlushStats flushStats() { } public DocsStats docStats() { - final Engine.Searcher searcher = acquireSearcher("doc_stats"); - try { + try (Engine.Searcher searcher = acquireSearcher("doc_stats")) { return new DocsStats(searcher.reader().numDocs(), searcher.reader().numDeletedDocs()); - } finally { - searcher.close(); } } @@ -977,8 +1008,10 @@ public void addShardFailureCallback(Callback onShardFailure) { this.shardEventListener.delegates.add(onShardFailure); } - /** Change the indexing and translog buffer sizes. If {@code IndexWriter} is currently using more than - * the new buffering indexing size then we do a refresh to free up the heap. */ + /** + * Change the indexing and translog buffer sizes. If {@code IndexWriter} is currently using more than + * the new buffering indexing size then we do a refresh to free up the heap. + */ public void updateBufferSize(ByteSizeValue shardIndexingBufferSize, ByteSizeValue shardTranslogBufferSize) { final EngineConfig config = engineConfig; @@ -1021,9 +1054,11 @@ public void updateBufferSize(ByteSizeValue shardIndexingBufferSize, ByteSizeValu engine.getTranslog().updateBuffer(shardTranslogBufferSize); } - /** Called by {@link IndexingMemoryController} to check whether more than {@code inactiveTimeNS} has passed since the last - * indexing operation, and become inactive (reducing indexing and translog buffers to tiny values) if so. This returns true - * if the shard is inactive. */ + /** + * Called by {@link IndexingMemoryController} to check whether more than {@code inactiveTimeNS} has passed since the last + * indexing operation, and become inactive (reducing indexing and translog buffers to tiny values) if so. This returns true + * if the shard is inactive. + */ public boolean checkIdle() { return checkIdle(inactiveTime.nanos()); } @@ -1042,8 +1077,10 @@ final boolean checkIdle(long inactiveTimeNS) { // pkg private for testing return active.get() == false; } - /** Returns {@code true} if this shard is active (has seen indexing ops in the last {@link - * IndexShard#INDEX_SHARD_INACTIVE_TIME_SETTING} (default 5 minutes), else {@code false}. */ + /** + * Returns {@code true} if this shard is active (has seen indexing ops in the last {@link + * IndexShard#INDEX_SHARD_INACTIVE_TIME_SETTING} (default 5 minutes), else {@code false}. + */ public boolean getActive() { return active.get(); } @@ -1077,10 +1114,10 @@ public boolean recoverFromStore(DiscoveryNode localNode) { return storeRecovery.recoverFromStore(this, shouldExist, localNode); } - public boolean restoreFromRepository(IndexShardRepository repository, DiscoveryNode locaNode) { + public boolean restoreFromRepository(IndexShardRepository repository, DiscoveryNode localNode) { assert shardRouting.primary() : "recover from store only makes sense if the shard is a primary shard"; StoreRecovery storeRecovery = new StoreRecovery(shardId, logger); - return storeRecovery.recoverFromRepository(this, repository, locaNode); + return storeRecovery.recoverFromRepository(this, repository, localNode); } /** @@ -1244,35 +1281,32 @@ public void run() { reschedule(); return; } - threadPool.executor(ThreadPool.Names.REFRESH).execute(new Runnable() { - @Override - public void run() { - try { - if (getEngine().refreshNeeded()) { - refresh("schedule"); - } - } catch (EngineClosedException e) { - // we are being closed, ignore - } catch (RefreshFailedEngineException e) { - if (e.getCause() instanceof InterruptedException) { - // ignore, we are being shutdown - } else if (e.getCause() instanceof ClosedByInterruptException) { - // ignore, we are being shutdown - } else if (e.getCause() instanceof ThreadInterruptedException) { - // ignore, we are being shutdown - } else { - if (state != IndexShardState.CLOSED) { - logger.warn("Failed to perform scheduled engine refresh", e); - } - } - } catch (Exception e) { + threadPool.executor(ThreadPool.Names.REFRESH).execute(() -> { + try { + if (getEngine().refreshNeeded()) { + refresh("schedule"); + } + } catch (EngineClosedException e) { + // we are being closed, ignore + } catch (RefreshFailedEngineException e) { + if (e.getCause() instanceof InterruptedException) { + // ignore, we are being shutdown + } else if (e.getCause() instanceof ClosedByInterruptException) { + // ignore, we are being shutdown + } else if (e.getCause() instanceof ThreadInterruptedException) { + // ignore, we are being shutdown + } else { if (state != IndexShardState.CLOSED) { logger.warn("Failed to perform scheduled engine refresh", e); } } - - reschedule(); + } catch (Exception e) { + if (state != IndexShardState.CLOSED) { + logger.warn("Failed to perform scheduled engine refresh", e); + } } + + reschedule(); }); } @@ -1369,8 +1403,10 @@ Engine getEngine() { return engine; } - /** NOTE: returns null if engine is not yet started (e.g. recovery phase 1, copying over index files, is still running), or if engine is - * closed. */ + /** + * NOTE: returns null if engine is not yet started (e.g. recovery phase 1, copying over index files, is still running), or if engine is + * closed. + */ protected Engine getEngineOrNull() { return this.currentEngineReference.get(); } @@ -1462,7 +1498,8 @@ protected void operationProcessed() { final Engine.Warmer engineWarmer = (searcher, toLevel) -> warmer.warm(searcher, this, idxSettings, toLevel); return new EngineConfig(shardId, threadPool, indexingService, indexSettings, engineWarmer, store, deletionPolicy, mergePolicyConfig.getMergePolicy(), mergeSchedulerConfig, - mapperService.indexAnalyzer(), similarityService.similarity(mapperService), codecService, shardEventListener, translogRecoveryPerformer, indexCache.query(), cachingPolicy, translogConfig, inactiveTime); + mapperService.indexAnalyzer(), similarityService.similarity(mapperService), codecService, shardEventListener, translogRecoveryPerformer, + indexCache.query(), cachingPolicy, translogConfig, inactiveTime); } private static class IndexShardOperationCounter extends AbstractRefCounted { @@ -1486,7 +1523,14 @@ protected void alreadyClosed() { } } - public void incrementOperationCounter() { + /** + * increments the ongoing operations counter. If the given primary term is lower then the one in {@link #shardRouting} + * an {@link IllegalIndexShardStateException} is thrown. + */ + public void incrementOperationCounter(long opPrimaryTerm) { + if (shardRouting.primaryTerm() > opPrimaryTerm) { + throw new IllegalIndexShardStateException(shardId, state, "operation term [{}] is too old (current [{}])", opPrimaryTerm, shardRouting.primaryTerm()); + } indexShardOperationCounter.incRef(); } @@ -1578,6 +1622,7 @@ public void onAfter() { /** * Simple struct encapsulating a shard failure + * * @see IndexShard#addShardFailureCallback(Callback) */ public static final class ShardFailure { @@ -1604,7 +1649,7 @@ protected QueryShardContext initialValue() { }; private QueryShardContext newQueryShardContext() { - return new QueryShardContext(idxSettings, provider.getClient(), indexCache.bitsetFilterCache(), indexFieldDataService, mapperService, similarityService, provider.getScriptService(), provider.getIndicesQueriesRegistry()); + return new QueryShardContext(idxSettings, provider.getClient(), indexCache.bitsetFilterCache(), indexFieldDataService, mapperService, similarityService, provider.getScriptService(), provider.getIndicesQueriesRegistry()); } /** diff --git a/core/src/main/java/org/elasticsearch/index/shard/TranslogRecoveryPerformer.java b/core/src/main/java/org/elasticsearch/index/shard/TranslogRecoveryPerformer.java index 68c552d4419b6..20a681678cd3a 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/TranslogRecoveryPerformer.java +++ b/core/src/main/java/org/elasticsearch/index/shard/TranslogRecoveryPerformer.java @@ -128,7 +128,7 @@ public void performRecoveryOperation(Engine engine, Translog.Operation operation Translog.Index index = (Translog.Index) operation; Engine.Index engineIndex = IndexShard.prepareIndex(docMapper(index.type()), source(index.source()).type(index.type()).id(index.id()) .routing(index.routing()).parent(index.parent()).timestamp(index.timestamp()).ttl(index.ttl()), - index.version(), index.versionType().versionTypeForReplicationAndRecovery(), Engine.Operation.Origin.RECOVERY); + index.seqNo(), index.version(), index.versionType().versionTypeForReplicationAndRecovery(), Engine.Operation.Origin.RECOVERY); maybeAddMappingUpdate(engineIndex.type(), engineIndex.parsedDoc().dynamicMappingsUpdate(), engineIndex.id(), allowMappingUpdates); if (logger.isTraceEnabled()) { logger.trace("[translog] recover [index] op of [{}][{}]", index.type(), index.id()); @@ -141,8 +141,9 @@ public void performRecoveryOperation(Engine engine, Translog.Operation operation if (logger.isTraceEnabled()) { logger.trace("[translog] recover [delete] op of [{}][{}]", uid.type(), uid.id()); } - engine.delete(new Engine.Delete(uid.type(), uid.id(), delete.uid(), delete.version(), - delete.versionType().versionTypeForReplicationAndRecovery(), Engine.Operation.Origin.RECOVERY, System.nanoTime(), false)); + Engine.Delete engineDelete = IndexShard.prepareDelete(uid.type(), uid.id(), delete.uid(), delete.seqNo(), + delete.version(), delete.versionType().versionTypeForReplicationAndRecovery(), Engine.Operation.Origin.RECOVERY); + engine.delete(engineDelete); break; default: throw new IllegalStateException("No operation defined for [" + operation + "]"); diff --git a/core/src/main/java/org/elasticsearch/index/translog/Translog.java b/core/src/main/java/org/elasticsearch/index/translog/Translog.java index f69cac45f7b33..f46d32b3f4d39 100644 --- a/core/src/main/java/org/elasticsearch/index/translog/Translog.java +++ b/core/src/main/java/org/elasticsearch/index/translog/Translog.java @@ -129,11 +129,11 @@ public void handle(View view) { }; - /** * Creates a new Translog instance. This method will create a new transaction log unless the given {@link TranslogConfig} has * a non-null {@link org.elasticsearch.index.translog.Translog.TranslogGeneration}. If the generation is null this method * us destructive and will delete all files in the translog path given. + * * @see TranslogConfig#getTranslogPath() */ public Translog(TranslogConfig config) throws IOException { @@ -141,7 +141,7 @@ public Translog(TranslogConfig config) throws IOException { this.config = config; TranslogGeneration translogGeneration = config.getTranslogGeneration(); - if (translogGeneration == null || translogGeneration.translogUUID == null) { // legacy case + if (translogGeneration == null || translogGeneration.translogUUID == null) { // legacy case translogUUID = Strings.randomBase64UUID(); } else { translogUUID = translogGeneration.translogUUID; @@ -347,7 +347,6 @@ public long sizeInBytes() { } - TranslogWriter createWriter(long fileGeneration) throws IOException { TranslogWriter newFile; try { @@ -508,6 +507,7 @@ static String getCommitCheckpointFileName(long generation) { /** * Ensures that the given location has be synced / written to the underlying storage. + * * @return Returns true iff this call caused an actual sync operation otherwise false */ public boolean ensureSynced(Location location) throws IOException { @@ -749,13 +749,21 @@ public int compareTo(Location o) { @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } Location location = (Location) o; - if (generation != location.generation) return false; - if (translogLocation != location.translogLocation) return false; + if (generation != location.generation) { + return false; + } + if (translogLocation != location.translogLocation) { + return false; + } return size == location.size; } @@ -846,10 +854,11 @@ public Source(BytesReference source, String routing, String parent, long timesta } public static class Index implements Operation { - public static final int SERIALIZATION_FORMAT = 6; + public static final int SERIALIZATION_FORMAT = 7; private String id; private String type; + private long seqNo = -1; private long version = Versions.MATCH_ANY; private VersionType versionType = VersionType.INTERNAL; private BytesReference source; @@ -867,6 +876,7 @@ public Index(Engine.Index index) { this.source = index.source(); this.routing = index.routing(); this.parent = index.parent(); + this.seqNo = index.seqNo(); this.version = index.version(); this.timestamp = index.timestamp(); this.ttl = index.ttl(); @@ -877,6 +887,8 @@ public Index(String type, String id, byte[] source) { this.type = type; this.id = id; this.source = new BytesArray(source); + this.seqNo = 0; + this.version = 0; } @Override @@ -917,6 +929,10 @@ public BytesReference source() { return this.source; } + public long seqNo() { + return seqNo; + } + public long version() { return this.version; } @@ -959,6 +975,9 @@ public void readFrom(StreamInput in) throws IOException { if (version >= 6) { this.versionType = VersionType.fromValue(in.readByte()); } + if (version >= 7) { + this.seqNo = in.readVLong(); + } } catch (Exception e) { throw new ElasticsearchException("failed to read [" + type + "][" + id + "]", e); } @@ -988,6 +1007,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(timestamp); out.writeLong(ttl); out.writeByte(versionType.getValue()); + out.writeVLong(seqNo); } @Override @@ -1002,6 +1022,7 @@ public boolean equals(Object o) { Index index = (Index) o; if (version != index.version || + seqNo != index.seqNo || timestamp != index.timestamp || ttl != index.ttl || id.equals(index.id) == false || @@ -1021,6 +1042,7 @@ public boolean equals(Object o) { public int hashCode() { int result = id.hashCode(); result = 31 * result + type.hashCode(); + result = 31 * result + Long.hashCode(seqNo); result = 31 * result + Long.hashCode(version); result = 31 * result + versionType.hashCode(); result = 31 * result + source.hashCode(); @@ -1041,9 +1063,10 @@ public String toString() { } public static class Delete implements Operation { - public static final int SERIALIZATION_FORMAT = 2; + public static final int SERIALIZATION_FORMAT = 3; private Term uid; + private long seqNo = -1L; private long version = Versions.MATCH_ANY; private VersionType versionType = VersionType.INTERNAL; @@ -1051,19 +1074,19 @@ public Delete() { } public Delete(Engine.Delete delete) { - this(delete.uid()); - this.version = delete.version(); - this.versionType = delete.versionType(); + this(delete.uid(), delete.seqNo(), delete.version(), delete.versionType()); } + /** utility for testing */ public Delete(Term uid) { - this.uid = uid; + this(uid, 0, 0, VersionType.EXTERNAL); } - public Delete(Term uid, long version, VersionType versionType) { + public Delete(Term uid, long seqNo, long version, VersionType versionType) { this.uid = uid; this.version = version; this.versionType = versionType; + this.seqNo = seqNo; } @Override @@ -1080,6 +1103,10 @@ public Term uid() { return this.uid; } + public long seqNo() { + return seqNo; + } + public long version() { return this.version; } @@ -1089,7 +1116,7 @@ public VersionType versionType() { } @Override - public Source getSource(){ + public Source getSource() { throw new IllegalStateException("trying to read doc source from delete operation"); } @@ -1103,6 +1130,9 @@ public void readFrom(StreamInput in) throws IOException { if (version >= 2) { this.versionType = VersionType.fromValue(in.readByte()); } + if (version >= 3) { + this.seqNo = in.readVLong(); + } assert versionType.validateVersionForWrites(version); } @@ -1114,6 +1144,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(uid.text()); out.writeLong(version); out.writeByte(versionType.getValue()); + out.writeVLong(seqNo); } @Override @@ -1127,7 +1158,7 @@ public boolean equals(Object o) { Delete delete = (Delete) o; - return version == delete.version && + return version == delete.version && seqNo == delete.seqNo && uid.equals(delete.uid) && versionType == delete.versionType; } @@ -1135,6 +1166,7 @@ public boolean equals(Object o) { @Override public int hashCode() { int result = uid.hashCode(); + result = 31 * result + Long.hashCode(seqNo); result = 31 * result + Long.hashCode(version); result = 31 * result + versionType.hashCode(); return result; @@ -1198,7 +1230,7 @@ static Translog.Operation readOperation(BufferedChecksumStreamInput in) throws I // to prevent this unfortunately. in.mark(opSize); - in.skip(opSize-4); + in.skip(opSize - 4); verifyChecksum(in); in.reset(); } @@ -1250,7 +1282,7 @@ public static void writeOperationNoSize(BufferedChecksumStreamOutput out, Transl out.writeByte(op.opType().id()); op.writeTo(out); long checksum = out.getChecksum(); - out.writeInt((int)checksum); + out.writeInt((int) checksum); } /** diff --git a/core/src/main/java/org/elasticsearch/rest/action/bulk/RestBulkAction.java b/core/src/main/java/org/elasticsearch/rest/action/bulk/RestBulkAction.java index 9018435271413..cb4d09742840d 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/bulk/RestBulkAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/bulk/RestBulkAction.java @@ -20,7 +20,7 @@ package org.elasticsearch.rest.action.bulk; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.WriteConsistencyLevel; import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkRequest; @@ -97,49 +97,35 @@ public RestResponse buildResponse(BulkResponse response, XContentBuilder builder for (BulkItemResponse itemResponse : response) { builder.startObject(); builder.startObject(itemResponse.getOpType()); - builder.field(Fields._INDEX, itemResponse.getIndex()); - builder.field(Fields._TYPE, itemResponse.getType()); - builder.field(Fields._ID, itemResponse.getId()); - long version = itemResponse.getVersion(); - if (version != -1) { - builder.field(Fields._VERSION, itemResponse.getVersion()); - } if (itemResponse.isFailed()) { + builder.field(Fields._INDEX, itemResponse.getIndex()); + builder.field(Fields._TYPE, itemResponse.getType()); + builder.field(Fields._ID, itemResponse.getId()); builder.field(Fields.STATUS, itemResponse.getFailure().getStatus().getStatus()); builder.startObject(Fields.ERROR); ElasticsearchException.toXContent(builder, request, itemResponse.getFailure().getCause()); builder.endObject(); } else { - ActionWriteResponse.ShardInfo shardInfo = itemResponse.getResponse().getShardInfo(); - shardInfo.toXContent(builder, request); - if (itemResponse.getResponse() instanceof DeleteResponse) { - DeleteResponse deleteResponse = itemResponse.getResponse(); - if (deleteResponse.isFound()) { - builder.field(Fields.STATUS, shardInfo.status().getStatus()); - } else { - builder.field(Fields.STATUS, RestStatus.NOT_FOUND.getStatus()); + final DocWriteResponse docResponse = itemResponse.getResponse(); + docResponse.toXContent(builder, request); + RestStatus status = docResponse.getShardInfo().status(); + if (docResponse instanceof DeleteResponse) { + DeleteResponse deleteResponse = (DeleteResponse) docResponse; + if (deleteResponse.isFound() == false) { + status = RestStatus.NOT_FOUND; } - builder.field(Fields.FOUND, deleteResponse.isFound()); - } else if (itemResponse.getResponse() instanceof IndexResponse) { - IndexResponse indexResponse = itemResponse.getResponse(); + } else if (docResponse instanceof IndexResponse) { + IndexResponse indexResponse = (IndexResponse) docResponse; if (indexResponse.isCreated()) { - builder.field(Fields.STATUS, RestStatus.CREATED.getStatus()); - } else { - builder.field(Fields.STATUS, shardInfo.status().getStatus()); + status = RestStatus.CREATED; } - } else if (itemResponse.getResponse() instanceof UpdateResponse) { - UpdateResponse updateResponse = itemResponse.getResponse(); + } else if (docResponse instanceof UpdateResponse) { + UpdateResponse updateResponse = (UpdateResponse) docResponse; if (updateResponse.isCreated()) { - builder.field(Fields.STATUS, RestStatus.CREATED.getStatus()); - } else { - builder.field(Fields.STATUS, shardInfo.status().getStatus()); - } - if (updateResponse.getGetResult() != null) { - builder.startObject(Fields.GET); - updateResponse.getGetResult().toXContentEmbedded(builder, request); - builder.endObject(); + status = RestStatus.CREATED; } } + builder.field(Fields.STATUS, status.getStatus()); } builder.endObject(); builder.endObject(); @@ -161,9 +147,5 @@ static final class Fields { static final XContentBuilderString STATUS = new XContentBuilderString("status"); static final XContentBuilderString ERROR = new XContentBuilderString("error"); static final XContentBuilderString TOOK = new XContentBuilderString("took"); - static final XContentBuilderString _VERSION = new XContentBuilderString("_version"); - static final XContentBuilderString FOUND = new XContentBuilderString("found"); - static final XContentBuilderString GET = new XContentBuilderString("get"); } - } diff --git a/core/src/main/java/org/elasticsearch/rest/action/delete/RestDeleteAction.java b/core/src/main/java/org/elasticsearch/rest/action/delete/RestDeleteAction.java index 209ab686ce5b3..9b740954aaf21 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/delete/RestDeleteAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/delete/RestDeleteAction.java @@ -19,7 +19,6 @@ package org.elasticsearch.rest.action.delete; -import org.elasticsearch.action.ActionWriteResponse; import org.elasticsearch.action.WriteConsistencyLevel; import org.elasticsearch.action.delete.DeleteRequest; import org.elasticsearch.action.delete.DeleteResponse; @@ -27,7 +26,6 @@ import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.index.VersionType; import org.elasticsearch.rest.*; import org.elasticsearch.rest.action.support.RestActions; @@ -65,15 +63,10 @@ public void handleRequest(final RestRequest request, final RestChannel channel, client.delete(deleteRequest, new RestBuilderListener(channel) { @Override public RestResponse buildResponse(DeleteResponse result, XContentBuilder builder) throws Exception { - ActionWriteResponse.ShardInfo shardInfo = result.getShardInfo(); - builder.startObject().field(Fields.FOUND, result.isFound()) - .field(Fields._INDEX, result.getIndex()) - .field(Fields._TYPE, result.getType()) - .field(Fields._ID, result.getId()) - .field(Fields._VERSION, result.getVersion()) - .value(shardInfo) - .endObject(); - RestStatus status = shardInfo.status(); + builder.startObject(); + result.toXContent(builder, request); + builder.endObject(); + RestStatus status = result.getShardInfo().status(); if (!result.isFound()) { status = NOT_FOUND; } @@ -81,12 +74,4 @@ public RestResponse buildResponse(DeleteResponse result, XContentBuilder builder } }); } - - static final class Fields { - static final XContentBuilderString FOUND = new XContentBuilderString("found"); - static final XContentBuilderString _INDEX = new XContentBuilderString("_index"); - static final XContentBuilderString _TYPE = new XContentBuilderString("_type"); - static final XContentBuilderString _ID = new XContentBuilderString("_id"); - static final XContentBuilderString _VERSION = new XContentBuilderString("_version"); - } } diff --git a/core/src/main/java/org/elasticsearch/rest/action/index/RestIndexAction.java b/core/src/main/java/org/elasticsearch/rest/action/index/RestIndexAction.java index d0d0fe68a13c1..ec673952b7c63 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/index/RestIndexAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/index/RestIndexAction.java @@ -19,7 +19,6 @@ package org.elasticsearch.rest.action.index; -import org.elasticsearch.action.ActionWriteResponse; import org.elasticsearch.action.WriteConsistencyLevel; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; @@ -27,7 +26,6 @@ import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.index.VersionType; import org.elasticsearch.rest.*; import org.elasticsearch.rest.action.support.RestActions; @@ -37,7 +35,8 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; import static org.elasticsearch.rest.RestRequest.Method.PUT; -import static org.elasticsearch.rest.RestStatus.*; +import static org.elasticsearch.rest.RestStatus.BAD_REQUEST; +import static org.elasticsearch.rest.RestStatus.CREATED; /** * @@ -85,7 +84,7 @@ public void handleRequest(final RestRequest request, final RestChannel channel, if (sOpType != null) { try { indexRequest.opType(IndexRequest.OpType.fromString(sOpType)); - } catch (IllegalArgumentException eia){ + } catch (IllegalArgumentException eia) { try { XContentBuilder builder = channel.newErrorBuilder(); channel.sendResponse(new BytesRestResponse(BAD_REQUEST, builder.startObject().field("error", eia.getMessage()).endObject())); @@ -103,15 +102,8 @@ public void handleRequest(final RestRequest request, final RestChannel channel, @Override public RestResponse buildResponse(IndexResponse response, XContentBuilder builder) throws Exception { builder.startObject(); - ActionWriteResponse.ShardInfo shardInfo = response.getShardInfo(); - builder.field(Fields._INDEX, response.getIndex()) - .field(Fields._TYPE, response.getType()) - .field(Fields._ID, response.getId()) - .field(Fields._VERSION, response.getVersion()); - shardInfo.toXContent(builder, request); - builder.field(Fields.CREATED, response.isCreated()); - builder.endObject(); - RestStatus status = shardInfo.status(); + response.toXContent(builder, request); + RestStatus status = response.getShardInfo().status(); if (response.isCreated()) { status = CREATED; } @@ -119,13 +111,4 @@ public RestResponse buildResponse(IndexResponse response, XContentBuilder builde } }); } - - static final class Fields { - static final XContentBuilderString _INDEX = new XContentBuilderString("_index"); - static final XContentBuilderString _TYPE = new XContentBuilderString("_type"); - static final XContentBuilderString _ID = new XContentBuilderString("_id"); - static final XContentBuilderString _VERSION = new XContentBuilderString("_version"); - static final XContentBuilderString CREATED = new XContentBuilderString("created"); - } - } diff --git a/core/src/main/java/org/elasticsearch/rest/action/update/RestUpdateAction.java b/core/src/main/java/org/elasticsearch/rest/action/update/RestUpdateAction.java index a23780db62ee5..7a46aadde8113 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/update/RestUpdateAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/update/RestUpdateAction.java @@ -19,7 +19,6 @@ package org.elasticsearch.rest.action.update; -import org.elasticsearch.action.ActionWriteResponse; import org.elasticsearch.action.WriteConsistencyLevel; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.update.UpdateRequest; @@ -29,15 +28,8 @@ import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.index.VersionType; -import org.elasticsearch.rest.BaseRestHandler; -import org.elasticsearch.rest.BytesRestResponse; -import org.elasticsearch.rest.RestChannel; -import org.elasticsearch.rest.RestController; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.rest.RestResponse; -import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.rest.*; import org.elasticsearch.rest.action.support.RestActions; import org.elasticsearch.rest.action.support.RestBuilderListener; import org.elasticsearch.script.Script; @@ -127,21 +119,9 @@ public void handleRequest(final RestRequest request, final RestChannel channel, @Override public RestResponse buildResponse(UpdateResponse response, XContentBuilder builder) throws Exception { builder.startObject(); - ActionWriteResponse.ShardInfo shardInfo = response.getShardInfo(); - builder.field(Fields._INDEX, response.getIndex()) - .field(Fields._TYPE, response.getType()) - .field(Fields._ID, response.getId()) - .field(Fields._VERSION, response.getVersion()); - - shardInfo.toXContent(builder, request); - if (response.getGetResult() != null) { - builder.startObject(Fields.GET); - response.getGetResult().toXContentEmbedded(builder, request); - builder.endObject(); - } - + response.toXContent(builder, request); builder.endObject(); - RestStatus status = shardInfo.status(); + RestStatus status = response.getShardInfo().status(); if (response.isCreated()) { status = CREATED; } @@ -149,12 +129,4 @@ public RestResponse buildResponse(UpdateResponse response, XContentBuilder build } }); } - - static final class Fields { - static final XContentBuilderString _INDEX = new XContentBuilderString("_index"); - static final XContentBuilderString _TYPE = new XContentBuilderString("_type"); - static final XContentBuilderString _ID = new XContentBuilderString("_id"); - static final XContentBuilderString _VERSION = new XContentBuilderString("_version"); - static final XContentBuilderString GET = new XContentBuilderString("get"); - } } diff --git a/core/src/main/java/org/elasticsearch/transport/BaseTransportResponseHandler.java b/core/src/main/java/org/elasticsearch/transport/BaseTransportResponseHandler.java index df2362dd47bad..e842b571fa856 100644 --- a/core/src/main/java/org/elasticsearch/transport/BaseTransportResponseHandler.java +++ b/core/src/main/java/org/elasticsearch/transport/BaseTransportResponseHandler.java @@ -20,7 +20,7 @@ package org.elasticsearch.transport; /** - * A simple based class that always spawns. + * A simple base class that always spawns. */ public abstract class BaseTransportResponseHandler implements TransportResponseHandler { diff --git a/core/src/test/java/org/elasticsearch/action/support/replication/BroadcastReplicationTests.java b/core/src/test/java/org/elasticsearch/action/support/replication/BroadcastReplicationTests.java index a7fc01e9677a6..68f27a16c4fa6 100644 --- a/core/src/test/java/org/elasticsearch/action/support/replication/BroadcastReplicationTests.java +++ b/core/src/test/java/org/elasticsearch/action/support/replication/BroadcastReplicationTests.java @@ -20,8 +20,8 @@ import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionWriteResponse; import org.elasticsearch.action.NoShardAvailableActionException; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.UnavailableShardsException; import org.elasticsearch.action.admin.indices.flush.FlushRequest; import org.elasticsearch.action.admin.indices.flush.FlushResponse; @@ -58,12 +58,8 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; -import static org.elasticsearch.action.support.replication.ClusterStateCreationUtils.state; -import static org.elasticsearch.action.support.replication.ClusterStateCreationUtils.stateWithAssignedPrimariesAndOneReplica; -import static org.elasticsearch.action.support.replication.ClusterStateCreationUtils.stateWithNoShard; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.elasticsearch.action.support.replication.ClusterStateCreationUtils.*; +import static org.hamcrest.Matchers.*; public class BroadcastReplicationTests extends ESTestCase { @@ -101,7 +97,7 @@ public void testNotStartedPrimary() throws InterruptedException, ExecutionExcept randomBoolean() ? ShardRoutingState.INITIALIZING : ShardRoutingState.UNASSIGNED, ShardRoutingState.UNASSIGNED)); logger.debug("--> using initial state:\n{}", clusterService.state().prettyPrint()); Future response = (broadcastReplicationAction.execute(new BroadcastRequest().indices(index))); - for (Tuple> shardRequests : broadcastReplicationAction.capturedShardRequests) { + for (Tuple> shardRequests : broadcastReplicationAction.capturedShardRequests) { if (randomBoolean()) { shardRequests.v2().onFailure(new NoShardAvailableActionException(shardRequests.v1())); } else { @@ -120,10 +116,10 @@ public void testStartedPrimary() throws InterruptedException, ExecutionException ShardRoutingState.STARTED)); logger.debug("--> using initial state:\n{}", clusterService.state().prettyPrint()); Future response = (broadcastReplicationAction.execute(new BroadcastRequest().indices(index))); - for (Tuple> shardRequests : broadcastReplicationAction.capturedShardRequests) { - ActionWriteResponse actionWriteResponse = new ActionWriteResponse(); - actionWriteResponse.setShardInfo(new ActionWriteResponse.ShardInfo(1, 1, new ActionWriteResponse.ShardInfo.Failure[0])); - shardRequests.v2().onResponse(actionWriteResponse); + for (Tuple> shardRequests : broadcastReplicationAction.capturedShardRequests) { + ReplicationResponse replicationResponse = new ReplicationResponse(); + replicationResponse.setShardInfo(new ReplicationResponse.ShardInfo(1, 1, new ReplicationResponse.ShardInfo.Failure[0])); + shardRequests.v2().onResponse(replicationResponse); } logger.info("total shards: {}, ", response.get().getTotalShards()); assertBroadcastResponse(1, 1, 0, response.get(), null); @@ -137,20 +133,20 @@ public void testResultCombine() throws InterruptedException, ExecutionException, Future response = (broadcastReplicationAction.execute(new BroadcastRequest().indices(index))); int succeeded = 0; int failed = 0; - for (Tuple> shardRequests : broadcastReplicationAction.capturedShardRequests) { + for (Tuple> shardRequests : broadcastReplicationAction.capturedShardRequests) { if (randomBoolean()) { - ActionWriteResponse.ShardInfo.Failure[] failures = new ActionWriteResponse.ShardInfo.Failure[0]; + ReplicationResponse.ShardInfo.Failure[] failures = new ReplicationResponse.ShardInfo.Failure[0]; int shardsSucceeded = randomInt(1) + 1; succeeded += shardsSucceeded; - ActionWriteResponse actionWriteResponse = new ActionWriteResponse(); + ReplicationResponse replicationResponse = new ReplicationResponse(); if (shardsSucceeded == 1 && randomBoolean()) { //sometimes add failure (no failure means shard unavailable) - failures = new ActionWriteResponse.ShardInfo.Failure[1]; - failures[0] = new ActionWriteResponse.ShardInfo.Failure(index, shardRequests.v1().id(), null, new Exception("pretend shard failed"), RestStatus.GATEWAY_TIMEOUT, false); + failures = new ReplicationResponse.ShardInfo.Failure[1]; + failures[0] = new ReplicationResponse.ShardInfo.Failure(index, shardRequests.v1().id(), null, new Exception("pretend shard failed"), RestStatus.GATEWAY_TIMEOUT, false); failed++; } - actionWriteResponse.setShardInfo(new ActionWriteResponse.ShardInfo(2, shardsSucceeded, failures)); - shardRequests.v2().onResponse(actionWriteResponse); + replicationResponse.setShardInfo(new ReplicationResponse.ShardInfo(2, shardsSucceeded, failures)); + shardRequests.v2().onResponse(replicationResponse); } else { // sometimes fail failed += 2; @@ -179,16 +175,16 @@ public void testShardsList() throws InterruptedException, ExecutionException { assertThat(shards.get(0), equalTo(shardId)); } - private class TestBroadcastReplicationAction extends TransportBroadcastReplicationAction { - protected final Set>> capturedShardRequests = ConcurrentCollections.newConcurrentSet(); + private class TestBroadcastReplicationAction extends TransportBroadcastReplicationAction { + protected final Set>> capturedShardRequests = ConcurrentCollections.newConcurrentSet(); public TestBroadcastReplicationAction(Settings settings, ThreadPool threadPool, ClusterService clusterService, TransportService transportService, ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver, TransportReplicationAction replicatedBroadcastShardAction) { super("test-broadcast-replication-action", BroadcastRequest::new, settings, threadPool, clusterService, transportService, actionFilters, indexNameExpressionResolver, replicatedBroadcastShardAction); } @Override - protected ActionWriteResponse newShardResponse() { - return new ActionWriteResponse(); + protected ReplicationResponse newShardResponse() { + return new ReplicationResponse(); } @Override @@ -202,7 +198,7 @@ protected BroadcastResponse newResponse(int successfulShards, int failedShards, } @Override - protected void shardExecute(BroadcastRequest request, ShardId shardId, ActionListener shardActionListener) { + protected void shardExecute(BroadcastRequest request, ShardId shardId, ActionListener shardActionListener) { capturedShardRequests.add(new Tuple<>(shardId, shardActionListener)); } } diff --git a/core/src/test/java/org/elasticsearch/action/support/replication/ClusterStateCreationUtils.java b/core/src/test/java/org/elasticsearch/action/support/replication/ClusterStateCreationUtils.java index fb7c59f353c66..7a63e9cb8af23 100644 --- a/core/src/test/java/org/elasticsearch/action/support/replication/ClusterStateCreationUtils.java +++ b/core/src/test/java/org/elasticsearch/action/support/replication/ClusterStateCreationUtils.java @@ -27,12 +27,7 @@ import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.cluster.routing.IndexRoutingTable; -import org.elasticsearch.cluster.routing.IndexShardRoutingTable; -import org.elasticsearch.cluster.routing.RoutingTable; -import org.elasticsearch.cluster.routing.ShardRoutingState; -import org.elasticsearch.cluster.routing.TestShardRouting; -import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.*; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.DummyTransportAddress; import org.elasticsearch.index.shard.ShardId; @@ -80,10 +75,11 @@ public static ClusterState state(String index, boolean primaryLocal, ShardRoutin } discoBuilder.localNodeId(newNode(0).id()); discoBuilder.masterNodeId(newNode(1).id()); // we need a non-local master to test shard failures + final int primaryTerm = randomInt(200); IndexMetaData indexMetaData = IndexMetaData.builder(index).settings(Settings.builder() .put(SETTING_VERSION_CREATED, Version.CURRENT) .put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas) - .put(SETTING_CREATION_DATE, System.currentTimeMillis())).build(); + .put(SETTING_CREATION_DATE, System.currentTimeMillis())).primaryTerm(0, primaryTerm).build(); RoutingTable.Builder routing = new RoutingTable.Builder(); routing.addAsNew(indexMetaData); @@ -105,7 +101,6 @@ public static ClusterState state(String index, boolean primaryLocal, ShardRoutin } else { unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null); } - final int primaryTerm = randomInt(200); indexShardRoutingBuilder.addShard(TestShardRouting.newShardRouting(index, 0, primaryNode, relocatingNode, null, primaryTerm, true, primaryState, 0, unassignedInfo)); diff --git a/core/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationActionTests.java b/core/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationActionTests.java index 79f3853b0a8d0..47b9beac5e6a9 100644 --- a/core/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationActionTests.java +++ b/core/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationActionTests.java @@ -20,7 +20,7 @@ import org.apache.lucene.index.CorruptIndexException; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.UnavailableShardsException; import org.elasticsearch.action.WriteConsistencyLevel; import org.elasticsearch.action.support.ActionFilter; @@ -74,11 +74,7 @@ import static org.elasticsearch.action.support.replication.ClusterStateCreationUtils.state; import static org.elasticsearch.action.support.replication.ClusterStateCreationUtils.stateWithStartedPrimary; -import static org.hamcrest.Matchers.arrayWithSize; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.notNullValue; -import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.*; public class TransportReplicationActionTests extends ESTestCase { @@ -344,6 +340,8 @@ protected void runReplicateTest(IndexShardRoutingTable shardRoutingTable, int as final ShardIterator shardIt = shardRoutingTable.shardsIt(); final ShardId shardId = shardIt.shardId(); final Request request = new Request(); + final long primaryTerm = randomInt(200); + request.primaryTerm(primaryTerm); PlainActionFuture listener = new PlainActionFuture<>(); logger.debug("expecting [{}] assigned replicas, [{}] total shards. using state: \n{}", assignedReplicas, totalShards, clusterService.state().prettyPrint()); @@ -401,7 +399,7 @@ action.new ReplicationPhase(shardIt, request, } assertThat(listener.isDone(), equalTo(true)); Response response = listener.get(); - final ActionWriteResponse.ShardInfo shardInfo = response.getShardInfo(); + final ReplicationResponse.ShardInfo shardInfo = response.getShardInfo(); assertThat(shardInfo.getFailed(), equalTo(criticalFailures)); assertThat(shardInfo.getFailures(), arrayWithSize(criticalFailures)); assertThat(shardInfo.getSuccessful(), equalTo(successful)); @@ -413,6 +411,25 @@ action.new ReplicationPhase(shardIt, request, } // all replicas have responded so the counter should be decreased again assertIndexShardCounter(1); + + // assert that nothing in the replica logic changes the primary term of the operation + assertThat(request.primaryTerm(), equalTo(primaryTerm)); + } + + public void testSeqNoIsSetOnPrimary() { + final String index = "test"; + final ShardId shardId = new ShardId(index, 0); + // we use one replica to check the primary term was set on the operation and sent to the replica + clusterService.setState(state(index, true, + ShardRoutingState.STARTED, randomFrom(ShardRoutingState.INITIALIZING, ShardRoutingState.STARTED))); + logger.debug("--> using initial state:\n{}", clusterService.state().prettyPrint()); + Request request = new Request(shardId); + PlainActionFuture listener = new PlainActionFuture<>(); + TransportReplicationAction.PrimaryPhase primaryPhase = action.new PrimaryPhase(request, listener); + primaryPhase.doRun(); + CapturingTransport.CapturedRequest[] requestsToReplicas = transport.capturedRequests(); + assertThat(requestsToReplicas, arrayWithSize(1)); + assertThat(((Request) requestsToReplicas[0].request).primaryTerm(), equalTo(clusterService.state().getMetaData().index(index).primaryTerm(0))); } public void testCounterOnPrimary() throws InterruptedException, ExecutionException, IOException { @@ -586,7 +603,7 @@ public void readFrom(StreamInput in) throws IOException { } } - static class Response extends ActionWriteResponse { + static class Response extends ReplicationResponse { } class Action extends TransportReplicationAction { @@ -632,7 +649,7 @@ protected boolean resolveIndex() { } @Override - protected Releasable getIndexShardOperationsCounter(ShardId shardId) { + protected Releasable getIndexShardOperationsCounter(ShardId shardId, long opPrimaryTerm) { return getOrCreateIndexShardOperationsCounter(); } } diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingTests.java index c547994b3fdc8..01a35d277fa66 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/ShardRoutingTests.java @@ -115,12 +115,18 @@ public void testIsSourceTargetRelocation() { ShardRouting initializingShard0 = TestShardRouting.newShardRouting("test", 0, "node1", randomInt(200), randomBoolean(), ShardRoutingState.INITIALIZING, 1); ShardRouting initializingShard1 = TestShardRouting.newShardRouting("test", 1, "node1", randomInt(200), randomBoolean(), ShardRoutingState.INITIALIZING, 1); ShardRouting startedShard0 = new ShardRouting(initializingShard0); + assertFalse(startedShard0.isRelocationTarget()); startedShard0.moveToStarted(); + assertFalse(startedShard0.isRelocationTarget()); ShardRouting startedShard1 = new ShardRouting(initializingShard1); + assertFalse(startedShard1.isRelocationTarget()); startedShard1.moveToStarted(); + assertFalse(startedShard1.isRelocationTarget()); ShardRouting sourceShard0a = new ShardRouting(startedShard0); sourceShard0a.relocate("node2", -1); + assertFalse(sourceShard0a.isRelocationTarget()); ShardRouting targetShard0a = sourceShard0a.buildTargetRelocatingShard(); + assertTrue(targetShard0a.isRelocationTarget()); ShardRouting sourceShard0b = new ShardRouting(startedShard0); sourceShard0b.relocate("node2", -1); ShardRouting sourceShard1 = new ShardRouting(startedShard1); diff --git a/core/src/test/java/org/elasticsearch/document/ShardInfoIT.java b/core/src/test/java/org/elasticsearch/document/ShardInfoIT.java index d4907d821285a..64190091a02fa 100644 --- a/core/src/test/java/org/elasticsearch/document/ShardInfoIT.java +++ b/core/src/test/java/org/elasticsearch/document/ShardInfoIT.java @@ -19,7 +19,7 @@ package org.elasticsearch.document; -import org.elasticsearch.action.ActionWriteResponse; +import org.elasticsearch.action.ReplicationResponse; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse; import org.elasticsearch.action.bulk.BulkItemResponse; @@ -34,10 +34,7 @@ import org.elasticsearch.test.ESIntegTestCase; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; -import static org.hamcrest.Matchers.not; -import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.*; /** */ @@ -117,11 +114,11 @@ private void prepareIndex(int numberOfPrimaryShards, boolean routingRequired) th } } - private void assertShardInfo(ActionWriteResponse response) { + private void assertShardInfo(ReplicationResponse response) { assertShardInfo(response, numCopies, numNodes); } - private void assertShardInfo(ActionWriteResponse response, int expectedTotal, int expectedSuccessful) { + private void assertShardInfo(ReplicationResponse response, int expectedTotal, int expectedSuccessful) { assertThat(response.getShardInfo().getTotal(), greaterThanOrEqualTo(expectedTotal)); assertThat(response.getShardInfo().getSuccessful(), greaterThanOrEqualTo(expectedSuccessful)); } diff --git a/core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index 2937461cb8c40..b30a38e228846 100644 --- a/core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -68,6 +68,7 @@ import org.elasticsearch.index.mapper.internal.SourceFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.index.mapper.object.RootObjectMapper; +import org.elasticsearch.index.seqno.SequenceNumbersService; import org.elasticsearch.index.shard.*; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.store.DirectoryService; @@ -190,9 +191,10 @@ private Document testDocument() { private ParsedDocument testParsedDocument(String uid, String id, String type, String routing, long timestamp, long ttl, Document document, BytesReference source, Mapping mappingUpdate) { Field uidField = new Field("_uid", uid, UidFieldMapper.Defaults.FIELD_TYPE); Field versionField = new NumericDocValuesField("_version", 0); + Field seqNoField = new NumericDocValuesField("_seq_no", 0); document.add(uidField); document.add(versionField); - return new ParsedDocument(uidField, versionField, id, type, routing, timestamp, ttl, Arrays.asList(document), source, mappingUpdate); + return new ParsedDocument(uidField, versionField, seqNoField, id, type, routing, timestamp, ttl, Arrays.asList(document), source, mappingUpdate); } protected Store createStore() throws IOException { @@ -246,7 +248,8 @@ public EngineConfig config(IndexSettings indexSettings, Store store, Path transl public void onFailedEngine(String reason, @Nullable Throwable t) { // we don't need to notify anybody in this test } - }, new TranslogHandler(shardId.index().getName(), logger), IndexSearcher.getDefaultQueryCache(), IndexSearcher.getDefaultQueryCachingPolicy(), translogConfig, TimeValue.timeValueMinutes(5)); + }, new TranslogHandler(shardId.index().getName(), logger), IndexSearcher.getDefaultQueryCache(), IndexSearcher.getDefaultQueryCachingPolicy(), + translogConfig, TimeValue.timeValueMinutes(5)); try { config.setCreate(Lucene.indexExists(store.directory()) == false); } catch (IOException e) { @@ -784,7 +787,9 @@ public void testRenewSyncFlush() throws Exception { engine.flush(); final boolean forceMergeFlushes = randomBoolean(); if (forceMergeFlushes) { - engine.index(new Engine.Index(newUid("3"), doc, Versions.MATCH_ANY, VersionType.INTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime() - engine.engineConfig.getFlushMergesAfter().nanos())); + engine.index(new Engine.Index(newUid("3"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, Versions.MATCH_ANY, + VersionType.INTERNAL, Engine.Operation.Origin.PRIMARY, + System.nanoTime() - engine.engineConfig.getFlushMergesAfter().nanos())); } else { engine.index(new Engine.Index(newUid("3"), doc)); } @@ -824,7 +829,7 @@ public void testRenewSyncFlush() throws Exception { } } - public void testSycnedFlushSurvivesEngineRestart() throws IOException { + public void testSyncedFlushSurvivesEngineRestart() throws IOException { final String syncId = randomUnicodeOfCodepointLengthBetween(10, 20); ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField(), B_1, null); engine.index(new Engine.Index(newUid("1"), doc)); @@ -843,7 +848,7 @@ public void testSycnedFlushSurvivesEngineRestart() throws IOException { assertEquals(engine.getLastCommittedSegmentInfos().getUserData().get(Engine.SYNC_COMMIT_ID), syncId); } - public void testSycnedFlushVanishesOnReplay() throws IOException { + public void testSyncedFlushVanishesOnReplay() throws IOException { final String syncId = randomUnicodeOfCodepointLengthBetween(10, 20); ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField(), B_1, null); engine.index(new Engine.Index(newUid("1"), doc)); @@ -872,8 +877,7 @@ public void testVersioningNewCreate() { Engine.Index create = new Engine.Index(newUid("1"), doc, Versions.MATCH_DELETED); engine.index(create); assertThat(create.version(), equalTo(1l)); - - create = new Engine.Index(newUid("1"), doc, create.version(), create.versionType().versionTypeForReplicationAndRecovery(), REPLICA, 0); + create = new Engine.Index(newUid("1"), doc, create.seqNo(), create.version(), create.versionType().versionTypeForReplicationAndRecovery(), REPLICA, 0); replicaEngine.index(create); assertThat(create.version(), equalTo(1l)); } @@ -884,18 +888,18 @@ public void testVersioningNewIndex() { engine.index(index); assertThat(index.version(), equalTo(1l)); - index = new Engine.Index(newUid("1"), doc, index.version(), index.versionType().versionTypeForReplicationAndRecovery(), REPLICA, 0); + index = new Engine.Index(newUid("1"), doc, index.seqNo(), index.version(), index.versionType().versionTypeForReplicationAndRecovery(), REPLICA, 0); replicaEngine.index(index); assertThat(index.version(), equalTo(1l)); } public void testExternalVersioningNewIndex() { ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, testDocument(), B_1, null); - Engine.Index index = new Engine.Index(newUid("1"), doc, 12, VersionType.EXTERNAL, PRIMARY, 0); + Engine.Index index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 12, VersionType.EXTERNAL, PRIMARY, 0); engine.index(index); assertThat(index.version(), equalTo(12l)); - index = new Engine.Index(newUid("1"), doc, index.version(), index.versionType().versionTypeForReplicationAndRecovery(), REPLICA, 0); + index = new Engine.Index(newUid("1"), doc, index.seqNo(), index.version(), index.versionType().versionTypeForReplicationAndRecovery(), REPLICA, 0); replicaEngine.index(index); assertThat(index.version(), equalTo(12l)); } @@ -910,7 +914,7 @@ public void testVersioningIndexConflict() { engine.index(index); assertThat(index.version(), equalTo(2l)); - index = new Engine.Index(newUid("1"), doc, 1l, VersionType.INTERNAL, Engine.Operation.Origin.PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 1l, VersionType.INTERNAL, Engine.Operation.Origin.PRIMARY, 0); try { engine.index(index); fail(); @@ -919,7 +923,7 @@ public void testVersioningIndexConflict() { } // future versions should not work as well - index = new Engine.Index(newUid("1"), doc, 3l, VersionType.INTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 3l, VersionType.INTERNAL, PRIMARY, 0); try { engine.index(index); fail(); @@ -930,15 +934,15 @@ public void testVersioningIndexConflict() { public void testExternalVersioningIndexConflict() { ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, testDocument(), B_1, null); - Engine.Index index = new Engine.Index(newUid("1"), doc, 12, VersionType.EXTERNAL, PRIMARY, 0); + Engine.Index index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 12, VersionType.EXTERNAL, PRIMARY, 0); engine.index(index); assertThat(index.version(), equalTo(12l)); - index = new Engine.Index(newUid("1"), doc, 14, VersionType.EXTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 14, VersionType.EXTERNAL, PRIMARY, 0); engine.index(index); assertThat(index.version(), equalTo(14l)); - index = new Engine.Index(newUid("1"), doc, 13, VersionType.EXTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 13, VersionType.EXTERNAL, PRIMARY, 0); try { engine.index(index); fail(); @@ -959,7 +963,7 @@ public void testVersioningIndexConflictWithFlush() { engine.flush(); - index = new Engine.Index(newUid("1"), doc, 1l, VersionType.INTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 1l, VersionType.INTERNAL, PRIMARY, 0); try { engine.index(index); fail(); @@ -968,7 +972,7 @@ public void testVersioningIndexConflictWithFlush() { } // future versions should not work as well - index = new Engine.Index(newUid("1"), doc, 3l, VersionType.INTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 3l, VersionType.INTERNAL, PRIMARY, 0); try { engine.index(index); fail(); @@ -979,17 +983,17 @@ public void testVersioningIndexConflictWithFlush() { public void testExternalVersioningIndexConflictWithFlush() { ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, testDocument(), B_1, null); - Engine.Index index = new Engine.Index(newUid("1"), doc, 12, VersionType.EXTERNAL, PRIMARY, 0); + Engine.Index index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 12, VersionType.EXTERNAL, PRIMARY, 0); engine.index(index); assertThat(index.version(), equalTo(12l)); - index = new Engine.Index(newUid("1"), doc, 14, VersionType.EXTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 14, VersionType.EXTERNAL, PRIMARY, 0); engine.index(index); assertThat(index.version(), equalTo(14l)); engine.flush(); - index = new Engine.Index(newUid("1"), doc, 13, VersionType.EXTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 13, VersionType.EXTERNAL, PRIMARY, 0); try { engine.index(index); fail(); @@ -1103,7 +1107,7 @@ public void testVersioningDeleteConflict() { engine.index(index); assertThat(index.version(), equalTo(2l)); - Engine.Delete delete = new Engine.Delete("test", "1", newUid("1"), 1l, VersionType.INTERNAL, PRIMARY, 0, false); + Engine.Delete delete = new Engine.Delete("test", "1", newUid("1"), SequenceNumbersService.UNASSIGNED_SEQ_NO, 1l, VersionType.INTERNAL, PRIMARY, 0, false); try { engine.delete(delete); fail(); @@ -1112,7 +1116,7 @@ public void testVersioningDeleteConflict() { } // future versions should not work as well - delete = new Engine.Delete("test", "1", newUid("1"), 3l, VersionType.INTERNAL, PRIMARY, 0, false); + delete = new Engine.Delete("test", "1", newUid("1"), SequenceNumbersService.UNASSIGNED_SEQ_NO, 3l, VersionType.INTERNAL, PRIMARY, 0, false); try { engine.delete(delete); fail(); @@ -1121,12 +1125,12 @@ public void testVersioningDeleteConflict() { } // now actually delete - delete = new Engine.Delete("test", "1", newUid("1"), 2l, VersionType.INTERNAL, PRIMARY, 0, false); + delete = new Engine.Delete("test", "1", newUid("1"), SequenceNumbersService.UNASSIGNED_SEQ_NO, 2l, VersionType.INTERNAL, PRIMARY, 0, false); engine.delete(delete); assertThat(delete.version(), equalTo(3l)); // now check if we can index to a delete doc with version - index = new Engine.Index(newUid("1"), doc, 2l, VersionType.INTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 2l, VersionType.INTERNAL, PRIMARY, 0); try { engine.index(index); fail(); @@ -1134,13 +1138,9 @@ public void testVersioningDeleteConflict() { // all is well } - // we shouldn't be able to create as well - Engine.Index create = new Engine.Index(newUid("1"), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); - try { - engine.index(create); - } catch (VersionConflictEngineException e) { - // all is well - } + // we should be able to create as well + Engine.Index create = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); + engine.index(create); } public void testVersioningDeleteConflictWithFlush() { @@ -1155,7 +1155,7 @@ public void testVersioningDeleteConflictWithFlush() { engine.flush(); - Engine.Delete delete = new Engine.Delete("test", "1", newUid("1"), 1l, VersionType.INTERNAL, PRIMARY, 0, false); + Engine.Delete delete = new Engine.Delete("test", "1", newUid("1"), SequenceNumbersService.UNASSIGNED_SEQ_NO, 1l, VersionType.INTERNAL, PRIMARY, 0, false); try { engine.delete(delete); fail(); @@ -1164,7 +1164,7 @@ public void testVersioningDeleteConflictWithFlush() { } // future versions should not work as well - delete = new Engine.Delete("test", "1", newUid("1"), 3l, VersionType.INTERNAL, PRIMARY, 0, false); + delete = new Engine.Delete("test", "1", newUid("1"), SequenceNumbersService.UNASSIGNED_SEQ_NO, 3l, VersionType.INTERNAL, PRIMARY, 0, false); try { engine.delete(delete); fail(); @@ -1175,14 +1175,14 @@ public void testVersioningDeleteConflictWithFlush() { engine.flush(); // now actually delete - delete = new Engine.Delete("test", "1", newUid("1"), 2l, VersionType.INTERNAL, PRIMARY, 0, false); + delete = new Engine.Delete("test", "1", newUid("1"), SequenceNumbersService.UNASSIGNED_SEQ_NO, 2l, VersionType.INTERNAL, PRIMARY, 0, false); engine.delete(delete); assertThat(delete.version(), equalTo(3l)); engine.flush(); // now check if we can index to a delete doc with version - index = new Engine.Index(newUid("1"), doc, 2l, VersionType.INTERNAL, PRIMARY, 0); + index = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 2l, VersionType.INTERNAL, PRIMARY, 0); try { engine.index(index); fail(); @@ -1190,22 +1190,18 @@ public void testVersioningDeleteConflictWithFlush() { // all is well } - // we shouldn't be able to create as well - Engine.Index create = new Engine.Index(newUid("1"), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); - try { - engine.index(create); - } catch (VersionConflictEngineException e) { - // all is well - } + // we should be able to create + Engine.Index create = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); + engine.index(create); } public void testVersioningCreateExistsException() { ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, testDocument(), B_1, null); - Engine.Index create = new Engine.Index(newUid("1"), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); + Engine.Index create = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); engine.index(create); assertThat(create.version(), equalTo(1l)); - create = new Engine.Index(newUid("1"), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); + create = new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); try { engine.index(create); fail(); @@ -1216,13 +1212,13 @@ public void testVersioningCreateExistsException() { public void testVersioningCreateExistsExceptionWithFlush() { ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, testDocument(), B_1, null); - Engine.Index create = new Engine.Index(newUid("1"), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); + Engine.Index create = new Engine.Index(newUid("1"), doc, -1, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); engine.index(create); assertThat(create.version(), equalTo(1l)); engine.flush(); - create = new Engine.Index(newUid("1"), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); + create = new Engine.Index(newUid("1"), doc, -1, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, 0); try { engine.index(create); fail(); @@ -1242,12 +1238,12 @@ public void testVersioningReplicaConflict1() { assertThat(index.version(), equalTo(2l)); // apply the second index to the replica, should work fine - index = new Engine.Index(newUid("1"), doc, index.version(), VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0); + index = new Engine.Index(newUid("1"), doc, index.seqNo(), index.version(), VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0); replicaEngine.index(index); assertThat(index.version(), equalTo(2l)); // now, the old one should not work - index = new Engine.Index(newUid("1"), doc, 1l, VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0); + index = new Engine.Index(newUid("1"), doc, index.seqNo(), 1l, VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0); try { replicaEngine.index(index); fail(); @@ -1257,7 +1253,7 @@ public void testVersioningReplicaConflict1() { // second version on replica should fail as well try { - index = new Engine.Index(newUid("1"), doc, 2l + index = new Engine.Index(newUid("1"), doc, index.seqNo(), 2l , VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0); replicaEngine.index(index); assertThat(index.version(), equalTo(2l)); @@ -1273,7 +1269,7 @@ public void testVersioningReplicaConflict2() { assertThat(index.version(), equalTo(1l)); // apply the first index to the replica, should work fine - index = new Engine.Index(newUid("1"), doc, 1l + index = new Engine.Index(newUid("1"), doc, index.seqNo(), 1l , VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0); replicaEngine.index(index); assertThat(index.version(), equalTo(1l)); @@ -1289,14 +1285,14 @@ public void testVersioningReplicaConflict2() { assertThat(delete.version(), equalTo(3l)); // apply the delete on the replica (skipping the second index) - delete = new Engine.Delete("test", "1", newUid("1"), 3l + delete = new Engine.Delete("test", "1", newUid("1"), delete.seqNo(), 3l , VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0, false); replicaEngine.delete(delete); assertThat(delete.version(), equalTo(3l)); // second time delete with same version should fail try { - delete = new Engine.Delete("test", "1", newUid("1"), 3l + delete = new Engine.Delete("test", "1", newUid("1"), delete.seqNo(), 3l , VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0, false); replicaEngine.delete(delete); fail("excepted VersionConflictEngineException to be thrown"); @@ -1306,7 +1302,7 @@ public void testVersioningReplicaConflict2() { // now do the second index on the replica, it should fail try { - index = new Engine.Index(newUid("1"), doc, 2l, VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0); + index = new Engine.Index(newUid("1"), doc, index.seqNo(), 2l, VersionType.INTERNAL.versionTypeForReplicationAndRecovery(), REPLICA, 0); replicaEngine.index(index); fail("excepted VersionConflictEngineException to be thrown"); } catch (VersionConflictEngineException e) { @@ -1447,10 +1443,10 @@ public void testEnableGcDeletes() throws Exception { document.add(new TextField("value", "test1", Field.Store.YES)); ParsedDocument doc = testParsedDocument("1", "1", "test", null, -1, -1, document, B_2, null); - engine.index(new Engine.Index(newUid("1"), doc, 1, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime())); + engine.index(new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 1, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime())); // Delete document we just added: - engine.delete(new Engine.Delete("test", "1", newUid("1"), 10, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime(), false)); + engine.delete(new Engine.Delete("test", "1", newUid("1"), SequenceNumbersService.UNASSIGNED_SEQ_NO, 10, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime(), false)); // Get should not find the document Engine.GetResult getResult = engine.get(new Engine.Get(true, newUid("1"))); @@ -1464,7 +1460,7 @@ public void testEnableGcDeletes() throws Exception { } // Delete non-existent document - engine.delete(new Engine.Delete("test", "2", newUid("2"), 10, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime(), false)); + engine.delete(new Engine.Delete("test", "2", newUid("2"), SequenceNumbersService.UNASSIGNED_SEQ_NO, 10, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime(), false)); // Get should not find the document (we never indexed uid=2): getResult = engine.get(new Engine.Get(true, newUid("2"))); @@ -1472,7 +1468,7 @@ public void testEnableGcDeletes() throws Exception { // Try to index uid=1 with a too-old version, should fail: try { - engine.index(new Engine.Index(newUid("1"), doc, 2, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime())); + engine.index(new Engine.Index(newUid("1"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 2, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime())); fail("did not hit expected exception"); } catch (VersionConflictEngineException vcee) { // expected @@ -1484,7 +1480,7 @@ public void testEnableGcDeletes() throws Exception { // Try to index uid=2 with a too-old version, should fail: try { - engine.index(new Engine.Index(newUid("2"), doc, 2, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime())); + engine.index(new Engine.Index(newUid("2"), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 2, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime())); fail("did not hit expected exception"); } catch (VersionConflictEngineException vcee) { // expected @@ -1569,7 +1565,7 @@ public void testDeletesAloneCanTriggerRefresh() throws Exception { for (int i = 0; i < 100; i++) { String id = Integer.toString(i); ParsedDocument doc = testParsedDocument(id, id, "test", null, -1, -1, testDocument(), B_1, null); - engine.index(new Engine.Index(newUid(id), doc, 2, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime())); + engine.index(new Engine.Index(newUid(id), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 2, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime())); } // Force merge so we know all merges are done before we start deleting: @@ -1580,20 +1576,17 @@ public void testDeletesAloneCanTriggerRefresh() throws Exception { s.close(); for (int i = 0; i < 100; i++) { String id = Integer.toString(i); - engine.delete(new Engine.Delete("test", id, newUid(id), 10, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime(), false)); + engine.delete(new Engine.Delete("test", id, newUid(id), SequenceNumbersService.UNASSIGNED_SEQ_NO, 10, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime(), false)); } // We must assertBusy because refresh due to version map being full is done in background (REFRESH) thread pool: - assertBusy(new Runnable() { - @Override - public void run() { - Searcher s2 = engine.acquireSearcher("test"); - long version2 = ((DirectoryReader) s2.reader()).getVersion(); - s2.close(); - - // 100 buffered deletes will easily exceed 25% of our 1 KB indexing buffer so it should have forced a refresh: - assertThat(version2, greaterThan(version1)); - } + assertBusy(() -> { + Searcher s2 = engine.acquireSearcher("test"); + long version2 = ((DirectoryReader) s2.reader()).getVersion(); + s2.close(); + + // 100 buffered deletes will easily exceed 25% of our 1 KB indexing buffer so it should have forced a refresh: + assertThat(version2, greaterThan(version1)); }); } } @@ -1622,8 +1615,8 @@ public void testMissingTranslog() throws IOException { public void testTranslogReplayWithFailure() throws IOException { final int numDocs = randomIntBetween(1, 10); for (int i = 0; i < numDocs; i++) { - ParsedDocument doc = testParsedDocument(Integer.toString(i), Integer.toString(i), "test", null, -1, -1, testDocument(), new BytesArray("{}"), null); - Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); + ParsedDocument doc = testParsedDocument(Integer.toString(i), Integer.toString(i), "test", null, SequenceNumbersService.UNASSIGNED_SEQ_NO, -1, testDocument(), new BytesArray("{}"), null); + Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); engine.index(firstIndexRequest); assertThat(firstIndexRequest.version(), equalTo(1l)); } @@ -1675,7 +1668,8 @@ public void testSkipTranslogReplay() throws IOException { final int numDocs = randomIntBetween(1, 10); for (int i = 0; i < numDocs; i++) { ParsedDocument doc = testParsedDocument(Integer.toString(i), Integer.toString(i), "test", null, -1, -1, testDocument(), new BytesArray("{}"), null); - Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); + Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, + Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); engine.index(firstIndexRequest); assertThat(firstIndexRequest.version(), equalTo(1l)); } @@ -1770,7 +1764,7 @@ public void testUpgradeOldIndex() throws IOException { final int numExtraDocs = randomIntBetween(1, 10); for (int i = 0; i < numExtraDocs; i++) { ParsedDocument doc = testParsedDocument("extra" + Integer.toString(i), "extra" + Integer.toString(i), "test", null, -1, -1, testDocument(), new BytesArray("{}"), null); - Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); + Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); engine.index(firstIndexRequest); assertThat(firstIndexRequest.version(), equalTo(1l)); } @@ -1799,7 +1793,8 @@ public void testTranslogReplay() throws IOException { final int numDocs = randomIntBetween(1, 10); for (int i = 0; i < numDocs; i++) { ParsedDocument doc = testParsedDocument(Integer.toString(i), Integer.toString(i), "test", null, -1, -1, testDocument(), new BytesArray("{}"), null); - Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); + Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, + Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); engine.index(firstIndexRequest); assertThat(firstIndexRequest.version(), equalTo(1l)); } @@ -1848,7 +1843,7 @@ public void testTranslogReplay() throws IOException { int randomId = randomIntBetween(numDocs + 1, numDocs + 10); String uuidValue = "test#" + Integer.toString(randomId); ParsedDocument doc = testParsedDocument(uuidValue, Integer.toString(randomId), "test", null, -1, -1, testDocument(), new BytesArray("{}"), null); - Engine.Index firstIndexRequest = new Engine.Index(newUid(uuidValue), doc, 1, VersionType.EXTERNAL, PRIMARY, System.nanoTime()); + Engine.Index firstIndexRequest = new Engine.Index(newUid(uuidValue), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 1, VersionType.EXTERNAL, PRIMARY, System.nanoTime()); engine.index(firstIndexRequest); assertThat(firstIndexRequest.version(), equalTo(1l)); if (flush) { @@ -1856,7 +1851,7 @@ public void testTranslogReplay() throws IOException { } doc = testParsedDocument(uuidValue, Integer.toString(randomId), "test", null, -1, -1, testDocument(), new BytesArray("{}"), null); - Engine.Index idxRequest = new Engine.Index(newUid(uuidValue), doc, 2, VersionType.EXTERNAL, PRIMARY, System.nanoTime()); + Engine.Index idxRequest = new Engine.Index(newUid(uuidValue), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, 2, VersionType.EXTERNAL, PRIMARY, System.nanoTime()); engine.index(idxRequest); engine.refresh("test"); assertThat(idxRequest.version(), equalTo(2l)); @@ -1922,7 +1917,8 @@ public void testRecoverFromForeignTranslog() throws IOException { final int numDocs = randomIntBetween(1, 10); for (int i = 0; i < numDocs; i++) { ParsedDocument doc = testParsedDocument(Integer.toString(i), Integer.toString(i), "test", null, -1, -1, testDocument(), new BytesArray("{}"), null); - Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); + Engine.Index firstIndexRequest = new Engine.Index(newUid(Integer.toString(i)), doc, SequenceNumbersService.UNASSIGNED_SEQ_NO, + Versions.MATCH_DELETED, VersionType.INTERNAL, PRIMARY, System.nanoTime()); engine.index(firstIndexRequest); assertThat(firstIndexRequest.version(), equalTo(1l)); } diff --git a/core/src/test/java/org/elasticsearch/index/engine/ShadowEngineTests.java b/core/src/test/java/org/elasticsearch/index/engine/ShadowEngineTests.java index 3fe7a540bf8af..78fe08cd79b64 100644 --- a/core/src/test/java/org/elasticsearch/index/engine/ShadowEngineTests.java +++ b/core/src/test/java/org/elasticsearch/index/engine/ShadowEngineTests.java @@ -161,9 +161,10 @@ private ParseContext.Document testDocument() { private ParsedDocument testParsedDocument(String uid, String id, String type, String routing, long timestamp, long ttl, ParseContext.Document document, BytesReference source, Mapping mappingsUpdate) { Field uidField = new Field("_uid", uid, UidFieldMapper.Defaults.FIELD_TYPE); Field versionField = new NumericDocValuesField("_version", 0); + Field seqNoField = new NumericDocValuesField("_seq_no", 0); document.add(uidField); document.add(versionField); - return new ParsedDocument(uidField, versionField, id, type, routing, timestamp, ttl, Arrays.asList(document), source, mappingsUpdate); + return new ParsedDocument(uidField, versionField, seqNoField, id, type, routing, timestamp, ttl, Arrays.asList(document), source, mappingsUpdate); } protected Store createStore(Path p) throws IOException { @@ -220,7 +221,9 @@ public EngineConfig config(IndexSettings indexSettings, Store store, Path transl @Override public void onFailedEngine(String reason, @Nullable Throwable t) { // we don't need to notify anybody in this test - }}, null, IndexSearcher.getDefaultQueryCache(), IndexSearcher.getDefaultQueryCachingPolicy(), translogConfig, TimeValue.timeValueMinutes(5)); + } + }, null, IndexSearcher.getDefaultQueryCache(), IndexSearcher.getDefaultQueryCachingPolicy(), translogConfig, TimeValue.timeValueMinutes(5) + ); try { config.setCreate(Lucene.indexExists(store.directory()) == false); } catch (IOException e) { diff --git a/core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java b/core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java index ccbef6837c98a..10dd82a7ae84a 100644 --- a/core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java +++ b/core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java @@ -36,7 +36,8 @@ public class IndexingSlowLogTests extends ESTestCase { public void testSlowLogParsedDocumentPrinterSourceToLog() throws IOException { BytesReference source = JsonXContent.contentBuilder().startObject().field("foo", "bar").endObject().bytes(); - ParsedDocument pd = new ParsedDocument(new StringField("uid", "test:id", Store.YES), new IntField("version", 1, Store.YES), "id", + ParsedDocument pd = new ParsedDocument(new StringField("uid", "test:id", Store.YES), new IntField("version", 1, Store.YES), + new IntField("seqNo", 1, Store.YES), "id", "test", null, 0, -1, null, source, null); // Turning off document logging doesn't log source[] diff --git a/core/src/test/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapperTests.java index 4753f903cbf0c..e5e42f4d0a3c3 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapperTests.java @@ -88,8 +88,8 @@ public void testInjectIntoDocDuringParsing() throws Exception { .endObject() .endObject() .bytes()); - - assertFieldNames(set("a", "b", "b.c", "_uid", "_type", "_version", "_source", "_all"), doc); + + assertFieldNames(set("a", "b", "b.c", "_uid", "_type", "_version", "_seq_no", "_source", "_all"), doc); } public void testExplicitEnabled() throws Exception { @@ -106,7 +106,7 @@ public void testExplicitEnabled() throws Exception { .endObject() .bytes()); - assertFieldNames(set("field", "_uid", "_type", "_version", "_source", "_all"), doc); + assertFieldNames(set("field", "_uid", "_type", "_version", "_seq_no", "_source", "_all"), doc); } public void testDisabled() throws Exception { diff --git a/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 6c4f0d59cae87..73b758b90a05d 100644 --- a/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -34,6 +34,8 @@ import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; import org.elasticsearch.action.admin.indices.stats.IndexStats; import org.elasticsearch.action.admin.indices.stats.ShardStats; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.index.TransportIndexAction; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.cluster.ClusterInfoService; @@ -41,6 +43,7 @@ import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.InternalClusterInfoService; import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.metadata.SnapshotId; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.*; @@ -101,6 +104,7 @@ import static org.elasticsearch.common.xcontent.ToXContent.EMPTY_PARAMS; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; /** @@ -312,7 +316,7 @@ public void testDeleteIndexDecreasesCounter() throws InterruptedException, Execu client().admin().indices().prepareDelete("test").get(); assertThat(indexShard.getOperationsCount(), equalTo(0)); try { - indexShard.incrementOperationCounter(); + indexShard.incrementOperationCounter(indexShard.routingEntry().primaryTerm()); fail("we should not be able to increment anymore"); } catch (IndexShardClosedException e) { // expected @@ -325,11 +329,26 @@ public void testIndexShardCounter() throws InterruptedException, ExecutionExcept IndicesService indicesService = getInstanceFromNode(IndicesService.class); IndexService indexService = indicesService.indexServiceSafe("test"); IndexShard indexShard = indexService.getShardOrNull(0); + final long primaryTerm = indexShard.shardRouting.primaryTerm(); assertEquals(0, indexShard.getOperationsCount()); - indexShard.incrementOperationCounter(); + indexShard.incrementOperationCounter(primaryTerm); assertEquals(1, indexShard.getOperationsCount()); - indexShard.incrementOperationCounter(); + indexShard.incrementOperationCounter(primaryTerm); assertEquals(2, indexShard.getOperationsCount()); + + try { + indexShard.incrementOperationCounter(primaryTerm - 1); + fail("you can not increment the operation counter with an older primary term"); + } catch (IllegalIndexShardStateException e) { + assertThat(e.getMessage(), containsString("operation term")); + assertThat(e.getMessage(), containsString("too old")); + } + + // but you can increment with a newer one.. + indexShard.incrementOperationCounter(primaryTerm + 1 + randomInt(20)); + + + indexShard.decrementOperationCounter(); indexShard.decrementOperationCounter(); indexShard.decrementOperationCounter(); assertEquals(0, indexShard.getOperationsCount()); @@ -580,9 +599,10 @@ public void testShardStats() throws IOException { private ParsedDocument testParsedDocument(String uid, String id, String type, String routing, long timestamp, long ttl, ParseContext.Document document, BytesReference source, Mapping mappingUpdate) { Field uidField = new Field("_uid", uid, UidFieldMapper.Defaults.FIELD_TYPE); Field versionField = new NumericDocValuesField("_version", 0); + Field seqNoField = new NumericDocValuesField("_seq_no", 0); document.add(uidField); document.add(versionField); - return new ParsedDocument(uidField, versionField, id, type, routing, timestamp, ttl, Arrays.asList(document), source, mappingUpdate); + return new ParsedDocument(uidField, versionField, seqNoField, id, type, routing, timestamp, ttl, Arrays.asList(document), source, mappingUpdate); } public void testPreIndex() throws IOException { @@ -771,7 +791,7 @@ public void testRecoverFromStore() throws IOException { assertHitCount(response, 1); } - public void testFailIfIndexNotPresentInRecoverFromStore() throws IOException { + public void testFailIfIndexNotPresentInRecoverFromStore() throws Throwable { createIndex("test"); ensureGreen(); IndicesService indicesService = getInstanceFromNode(IndicesService.class); @@ -821,7 +841,12 @@ public void testFailIfIndexNotPresentInRecoverFromStore() throws IOException { newShard.updateRoutingEntry(routing, true); SearchResponse response = client().prepareSearch().get(); assertHitCount(response, 0); - client().prepareIndex("test", "test", "0").setSource("{}").setRefresh(true).get(); + // we can't issue this request through a client because of the inconsistencies we created with the cluster state + // doing it directly instead + IndexRequest request = client().prepareIndex("test", "test", "0").setSource("{}").request(); + request.process(MetaData.builder().put(test.getMetaData(), false).build(), null, false, "test"); + TransportIndexAction.executeIndexRequestOnPrimary(request, newShard, null); + newShard.refresh("test"); assertHitCount(client().prepareSearch().get(), 1); } diff --git a/core/src/test/java/org/elasticsearch/index/translog/TranslogTests.java b/core/src/test/java/org/elasticsearch/index/translog/TranslogTests.java index e76e09a6cfb88..db21fbc384d6b 100644 --- a/core/src/test/java/org/elasticsearch/index/translog/TranslogTests.java +++ b/core/src/test/java/org/elasticsearch/index/translog/TranslogTests.java @@ -302,7 +302,7 @@ public void testStats() throws IOException { assertThat(stats.estimatedNumberOfOperations(), equalTo(0l)); assertThat(stats.getTranslogSizeInBytes(), equalTo(firstOperationPosition)); assertEquals(6, total.estimatedNumberOfOperations()); - assertEquals(431, total.getTranslogSizeInBytes()); + assertEquals(437, total.getTranslogSizeInBytes()); BytesStreamOutput out = new BytesStreamOutput(); total.writeTo(out); @@ -310,10 +310,10 @@ public void testStats() throws IOException { copy.readFrom(StreamInput.wrap(out.bytes())); assertEquals(6, copy.estimatedNumberOfOperations()); - assertEquals(431, copy.getTranslogSizeInBytes()); + assertEquals(437, copy.getTranslogSizeInBytes()); assertEquals("\"translog\"{\n" + " \"operations\" : 6,\n" + - " \"size_in_bytes\" : 431\n" + + " \"size_in_bytes\" : 437\n" + "}", copy.toString().trim()); try { @@ -475,7 +475,7 @@ public void run() { break; case DELETE: op = new Translog.Delete(new Term("_uid", threadId + "_" + opCount), - 1 + randomInt(100000), + opCount, 1 + randomInt(100000), randomFrom(VersionType.values())); break; default: @@ -1127,7 +1127,7 @@ public void testRecoveryUncommittedCorryptedCheckpoint() throws IOException { try (Translog translog = new Translog(config)) { fail("corrupted"); } catch (IllegalStateException ex) { - assertEquals(ex.getMessage(), "Checkpoint file translog-2.ckp already exists but has corrupted content expected: Checkpoint{offset=2683, numOps=55, translogFileGeneration= 2} but got: Checkpoint{offset=0, numOps=0, translogFileGeneration= 0}"); + assertEquals(ex.getMessage(), "Checkpoint file translog-2.ckp already exists but has corrupted content expected: Checkpoint{offset=2738, numOps=55, translogFileGeneration= 2} but got: Checkpoint{offset=0, numOps=0, translogFileGeneration= 0}"); } Checkpoint.write(config.getTranslogPath().resolve(Translog.getCommitCheckpointFileName(read.generation)), read, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING); try (Translog translog = new Translog(config)) { diff --git a/core/src/test/java/org/elasticsearch/indices/flush/SyncedFlushSingleNodeTests.java b/core/src/test/java/org/elasticsearch/indices/flush/SyncedFlushSingleNodeTests.java index 1a4bf8fd3f764..f1322d16a782a 100644 --- a/core/src/test/java/org/elasticsearch/indices/flush/SyncedFlushSingleNodeTests.java +++ b/core/src/test/java/org/elasticsearch/indices/flush/SyncedFlushSingleNodeTests.java @@ -110,7 +110,7 @@ public void testSyncFailsIfOperationIsInFlight() throws InterruptedException { SyncedFlushService flushService = getInstanceFromNode(SyncedFlushService.class); final ShardId shardId = shard.shardId(); - shard.incrementOperationCounter(); + shard.incrementOperationCounter(shard.routingEntry().primaryTerm()); try { SyncedFlushUtil.LatchedListener listener = new SyncedFlushUtil.LatchedListener<>(); flushService.attemptSyncedFlush(shardId, listener); diff --git a/core/src/test/java/org/elasticsearch/routing/SimpleRoutingIT.java b/core/src/test/java/org/elasticsearch/routing/SimpleRoutingIT.java index a5b7da7796f86..fa41c9fef2d46 100644 --- a/core/src/test/java/org/elasticsearch/routing/SimpleRoutingIT.java +++ b/core/src/test/java/org/elasticsearch/routing/SimpleRoutingIT.java @@ -253,7 +253,7 @@ public void testRequiredRoutingWithPathMapping() throws Exception { assertThat(client().prepareGet(indexOrAlias(), "type1", "1").setRouting("0").execute().actionGet().isExists(), equalTo(true)); } } - + public void testRequiredRoutingWithPathMappingBulk() throws Exception { client().admin().indices().prepareCreate("test") .addAlias(new Alias("alias")) diff --git a/plugins/delete-by-query/src/test/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryActionTests.java b/plugins/delete-by-query/src/test/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryActionTests.java index 2b708341c7fd9..2a57af862b088 100644 --- a/plugins/delete-by-query/src/test/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryActionTests.java +++ b/plugins/delete-by-query/src/test/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryActionTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.util.concurrent.CountDown; +import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.internal.InternalSearchHit; @@ -225,7 +226,7 @@ public void testOnBulkResponse() { } else { deleted++; } - items[i] = new BulkItemResponse(i, "delete", new DeleteResponse("test", "type", String.valueOf(i), 1, delete)); + items[i] = new BulkItemResponse(i, "delete", new DeleteResponse(new ShardId("test", 0), "type", String.valueOf(i), i, 1, delete)); } else { items[i] = new BulkItemResponse(i, "delete", new BulkItemResponse.Failure("test", "type", String.valueOf(i), new Throwable("item failed"))); failed++; @@ -281,7 +282,7 @@ public void testOnBulkResponseMultipleIndices() { deleted[0] = deleted[0] + 1; deleted[index] = deleted[index] + 1; } - items[i] = new BulkItemResponse(i, "delete", new DeleteResponse("test-" + index, "type", String.valueOf(i), 1, delete)); + items[i] = new BulkItemResponse(i, "delete", new DeleteResponse(new ShardId("test-" + index, 0), "type", String.valueOf(i), i, 1, delete)); } else { items[i] = new BulkItemResponse(i, "delete", new BulkItemResponse.Failure("test-" + index, "type", String.valueOf(i), new Throwable("item failed"))); failed[0] = failed[0] + 1; @@ -408,7 +409,7 @@ private void assertNoFailures(TestActionListener listener) { private void assertSearchContextsClosed() { NodesStatsResponse nodesStats = client().admin().cluster().prepareNodesStats().setIndices(true).get(); - for (NodeStats nodeStat : nodesStats.getNodes()){ + for (NodeStats nodeStat : nodesStats.getNodes()) { assertThat(nodeStat.getIndices().getSearch().getOpenContexts(), equalTo(0L)); } } From bc8785539f3bb1df05a9a23cced2f7735eb3767a Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Thu, 19 Nov 2015 22:48:42 +0100 Subject: [PATCH 03/12] trace logging --- .../org/elasticsearch/recovery/RelocationIT.java | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java b/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java index 57b5e888ea92f..1765df27c11a6 100644 --- a/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java +++ b/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java @@ -21,7 +21,6 @@ import com.carrotsearch.hppc.IntHashSet; import com.carrotsearch.hppc.procedures.IntProcedure; - import org.apache.lucene.index.IndexFileNames; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -56,11 +55,7 @@ import org.elasticsearch.test.MockIndexEventListener; import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.test.transport.MockTransportService; -import org.elasticsearch.transport.Transport; -import org.elasticsearch.transport.TransportException; -import org.elasticsearch.transport.TransportRequest; -import org.elasticsearch.transport.TransportRequestOptions; -import org.elasticsearch.transport.TransportService; +import org.elasticsearch.transport.*; import java.io.IOException; import java.nio.file.FileVisitResult; @@ -79,9 +74,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.not; -import static org.hamcrest.Matchers.startsWith; +import static org.hamcrest.Matchers.*; /** */ @@ -144,6 +137,7 @@ public void testSimpleRelocationNoIndexing() { assertThat(client().prepareSearch("test").setSize(0).execute().actionGet().getHits().totalHits(), equalTo(20l)); } + @TestLogging("action.index:TRACE,action.bulk:TRACE,action.search:TRACE") public void testRelocationWhileIndexingRandom() throws Exception { int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4); int numberOfReplicas = randomBoolean() ? 0 : 1; From 2e1e430961e015742f516f156781c1cea8675448 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Fri, 20 Nov 2015 10:04:53 +0100 Subject: [PATCH 04/12] Disable RecoveryWhileUnderLoadIT for now --- .../elasticsearch/recovery/RecoveryWhileUnderLoadIT.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java index 8c0c7347a157a..5caac63d62872 100644 --- a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java +++ b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java @@ -19,6 +19,7 @@ package org.elasticsearch.recovery; +import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.action.admin.indices.refresh.RefreshResponse; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.admin.indices.stats.ShardStats; @@ -40,11 +41,9 @@ import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.settings.Settings.settingsBuilder; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllSuccessful; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; +@LuceneTestCase.AwaitsFix(bugUrl = "boaz looking into this") public class RecoveryWhileUnderLoadIT extends ESIntegTestCase { private final ESLogger logger = Loggers.getLogger(RecoveryWhileUnderLoadIT.class); From 8e1ef30bf02d8948b45c081bdf10495349808794 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Mon, 23 Nov 2015 11:50:07 +0100 Subject: [PATCH 05/12] re enable RecoveryWhileUnderLoadIT now that #14918 is merged. --- .../org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java index 5caac63d62872..027a0f86d2811 100644 --- a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java +++ b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java @@ -19,7 +19,6 @@ package org.elasticsearch.recovery; -import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.action.admin.indices.refresh.RefreshResponse; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.admin.indices.stats.ShardStats; @@ -43,7 +42,6 @@ import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; -@LuceneTestCase.AwaitsFix(bugUrl = "boaz looking into this") public class RecoveryWhileUnderLoadIT extends ESIntegTestCase { private final ESLogger logger = Loggers.getLogger(RecoveryWhileUnderLoadIT.class); From 3d061a8e6259006b0a849b0a7f059783af25b9f6 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Mon, 23 Nov 2015 21:57:07 +0100 Subject: [PATCH 06/12] Introduce a relocated state during primary relocation * this is a temporary fix until a more permanent fix is done on master * During primary relocation, some operation can be done on the source primary but reach the target primary only after the relocation is completed. At the moment the new primary will have a new primary term and as such it will reject the operations from the old one, causing data loss. This changes relocations to move the source primary to a relocated state, prevent any new operations from happening on it and waits for ongoing operations to complete. Long we term we may also conisder not incrementing the primary term on relocation. --- .../TransportReplicationAction.java | 7 ++-- .../service/InternalClusterService.java | 34 ++++--------------- .../elasticsearch/index/shard/IndexShard.java | 17 ++++++++-- .../recovery/RecoverySourceHandler.java | 14 +++++--- .../indices/IndicesLifecycleListenerIT.java | 8 ++--- .../recovery/RecoveryWhileUnderLoadIT.java | 24 +++++++------ 6 files changed, 50 insertions(+), 54 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java b/core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java index b4aaa72df15ec..3ef25ffc29951 100644 --- a/core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java +++ b/core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java @@ -832,8 +832,8 @@ public void onFailure(Throwable t) { @Override protected void doRun() { if (logger.isTraceEnabled()) { - logger.trace("replication phase started. pending [{}], action [{}], request [{}], cluster state version used [{}]", pending.get(), - actionName, replicaRequest, observer.observedState().version()); + logger.trace("replication phase started. pending [{}], action [{}], request [{}], cluster state version used [{}], primary on start", pending.get(), + actionName, replicaRequest, observer.observedState().version(), originalPrimaryShard); } if (pending.get() == 0) { doFinish(); @@ -961,6 +961,9 @@ private void forceFinishAsFailed(Throwable t) { private void doFinish() { if (finished.compareAndSet(false, true)) { + if (logger.isTraceEnabled()) { + logger.trace("finished replicating action [{}], request [{}], cluster state version [{}], primary on start {}", actionName, replicaRequest, observer.observedState().version(), originalPrimaryShard); + } Releasables.close(indexShardReference); final ShardId shardId = shardIt.shardId(); final ReplicationResponse.ShardInfo.Failure[] failuresArray; diff --git a/core/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java b/core/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java index c2300739a7dca..8fb8dc185b806 100644 --- a/core/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java +++ b/core/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java @@ -20,16 +20,8 @@ package org.elasticsearch.cluster.service; import org.elasticsearch.Version; -import org.elasticsearch.cluster.AckedClusterStateUpdateTask; -import org.elasticsearch.cluster.ClusterChangedEvent; -import org.elasticsearch.cluster.ClusterName; -import org.elasticsearch.cluster.ClusterService; -import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.*; import org.elasticsearch.cluster.ClusterState.Builder; -import org.elasticsearch.cluster.ClusterStateListener; -import org.elasticsearch.cluster.ClusterStateUpdateTask; -import org.elasticsearch.cluster.LocalNodeMasterListener; -import org.elasticsearch.cluster.TimeoutClusterStateListener; import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.metadata.MetaData; @@ -49,13 +41,7 @@ import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.ConcurrentCollections; -import org.elasticsearch.common.util.concurrent.CountDown; -import org.elasticsearch.common.util.concurrent.EsExecutors; -import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; -import org.elasticsearch.common.util.concurrent.FutureUtils; -import org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor; -import org.elasticsearch.common.util.concurrent.PrioritizedRunnable; +import org.elasticsearch.common.util.concurrent.*; import org.elasticsearch.common.util.iterable.Iterables; import org.elasticsearch.discovery.Discovery; import org.elasticsearch.discovery.DiscoveryService; @@ -63,18 +49,8 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.Executor; -import java.util.concurrent.Future; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; +import java.util.*; +import java.util.concurrent.*; import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadFactory; @@ -514,6 +490,7 @@ public void run() { logger.debug("set local cluster state to version {}", newClusterState.version()); for (ClusterStateListener listener : preAppliedListeners) { try { + logger.trace("calling [{}] with change to version [{}]", listener, newClusterState.version()); listener.clusterChanged(clusterChangedEvent); } catch (Exception ex) { logger.warn("failed to notify ClusterStateListener", ex); @@ -532,6 +509,7 @@ public void run() { for (ClusterStateListener listener : postAppliedListeners) { try { + logger.trace("calling [{}] with change to version [{}]", listener, newClusterState.version()); listener.clusterChanged(clusterChangedEvent); } catch (Exception ex) { logger.warn("failed to notify ClusterStateListener", ex); diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 8af89deac9fdb..f458ac8049e9b 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -417,13 +417,22 @@ public IndexShardState markAsRecovering(String reason, RecoveryState recoverySta } } - public IndexShard relocated(String reason) throws IndexShardNotStartedException { + public IndexShard relocated(String reason) throws IndexShardNotStartedException, InterruptedException { synchronized (mutex) { if (state != IndexShardState.STARTED) { throw new IndexShardNotStartedException(shardId, state); } changeState(IndexShardState.RELOCATED, reason); + // nocommit: awful hack to work around delay replications being rejected by the primary term check. This is used to make sure all in flight operation are done + // before primary relocation is done. proper fix coming. + indexShardOperationCounter.decRef(); } + + logger.info("waiting for op count to reach 0"); + while (indexShardOperationCounter.refCount() > 0) { + Thread.sleep(100); + } + logger.info("{} waiting for op count reached 0. continuing..."); return this; } @@ -779,8 +788,12 @@ public void close(String reason, boolean flushEngine) throws IOException { FutureUtils.cancel(mergeScheduleFuture); mergeScheduleFuture = null; } + // nocommit: done to temporary prevent operations on a relocated primary. Remove when properly fixed. + final boolean decOpCounter = state != IndexShardState.RELOCATED; changeState(IndexShardState.CLOSED, reason); - indexShardOperationCounter.decRef(); + if (decOpCounter) { + indexShardOperationCounter.decRef(); + } } finally { final Engine engine = this.currentEngineReference.getAndSet(null); try { diff --git a/core/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java b/core/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java index 73193161d12dd..94713bc031d9c 100644 --- a/core/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java +++ b/core/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java @@ -40,7 +40,10 @@ import org.elasticsearch.common.util.CancellableThreads; import org.elasticsearch.common.util.CancellableThreads.Interruptable; import org.elasticsearch.index.engine.RecoveryEngineException; -import org.elasticsearch.index.shard.*; +import org.elasticsearch.index.shard.IllegalIndexShardStateException; +import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.index.shard.IndexShardClosedException; +import org.elasticsearch.index.shard.IndexShardState; import org.elasticsearch.index.store.Store; import org.elasticsearch.index.store.StoreFileMetaData; import org.elasticsearch.index.translog.Translog; @@ -218,7 +221,7 @@ public void phase1(final IndexCommit snapshot, final Translog.View translogView) totalSize += md.length(); } List phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size()); - phase1Files.addAll(diff.different); + phase1Files.addAll(diff.different); phase1Files.addAll(diff.missing); for (StoreFileMetaData md : phase1Files) { if (request.metadataSnapshot().asMap().containsKey(md.name())) { @@ -319,7 +322,6 @@ public int compare(StoreFileMetaData o1, StoreFileMetaData o2) { } - protected void prepareTargetForTranslog(final Translog.View translogView) { StopWatch stopWatch = new StopWatch().start(); logger.trace("{} recovery [phase1] to {}: prepare remote engine for translog", request.shardId(), request.targetNode()); @@ -395,15 +397,17 @@ public void run() throws InterruptedException { } }); - - if (request.markAsRelocated()) { + if (request.markAsRelocated() || request.recoveryType() == RecoveryState.Type.RELOCATION) { // TODO what happens if the recovery process fails afterwards, we need to mark this back to started try { + // nocommit: awful hack to work around delay replications being rejected by the primary term check. proper fix coming. shard.relocated("to " + request.targetNode()); } catch (IllegalIndexShardStateException e) { // we can ignore this exception since, on the other node, when it moved to phase3 // it will also send shard started, which might cause the index shard we work against // to move be closed by the time we get to the the relocated method + } catch (InterruptedException e) { + throw new ElasticsearchException("interrupted while waiting for pending operation to finish on relocated primary", e); } } stopWatch.stop(); diff --git a/core/src/test/java/org/elasticsearch/indices/IndicesLifecycleListenerIT.java b/core/src/test/java/org/elasticsearch/indices/IndicesLifecycleListenerIT.java index 8de3af258270e..5c08c2d60cca0 100644 --- a/core/src/test/java/org/elasticsearch/indices/IndicesLifecycleListenerIT.java +++ b/core/src/test/java/org/elasticsearch/indices/IndicesLifecycleListenerIT.java @@ -54,11 +54,7 @@ import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.settings.Settings.builder; -import static org.elasticsearch.index.shard.IndexShardState.CLOSED; -import static org.elasticsearch.index.shard.IndexShardState.CREATED; -import static org.elasticsearch.index.shard.IndexShardState.POST_RECOVERY; -import static org.elasticsearch.index.shard.IndexShardState.RECOVERING; -import static org.elasticsearch.index.shard.IndexShardState.STARTED; +import static org.elasticsearch.index.shard.IndexShardState.*; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; @@ -181,7 +177,7 @@ public void testIndexStateShardChanged() throws Throwable { ensureGreen(); //the 3 relocated shards get closed on the first node - assertShardStatesMatch(stateChangeListenerNode1, 3, CLOSED); + assertShardStatesMatch(stateChangeListenerNode1, 3, RELOCATED, CLOSED); //the 3 relocated shards get created on the second node assertShardStatesMatch(stateChangeListenerNode2, 3, CREATED, RECOVERING, POST_RECOVERY, STARTED); diff --git a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java index da6e5ed934c78..5468e095de453 100644 --- a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java +++ b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java @@ -164,6 +164,7 @@ public void testRecoverWhileUnderLoadWithReducedAllowedNodes() throws Exception try (BackgroundIndexer indexer = new BackgroundIndexer("test", "type", client(), extraDocs)) { logger.info("--> waiting for {} docs to be indexed ...", waitFor); waitForDocs(waitFor, indexer); + indexer.assertNoFailures(); logger.info("--> {} docs indexed", waitFor); @@ -176,6 +177,7 @@ public void testRecoverWhileUnderLoadWithReducedAllowedNodes() throws Exception logger.info("--> waiting for {} docs to be indexed ...", waitFor); waitForDocs(waitFor, indexer); + indexer.assertNoFailures(); logger.info("--> {} docs indexed", waitFor); @@ -294,17 +296,17 @@ private void iterateAssertCount(final int numberOfShards, final long numberOfDoc //if there was an error we try to wait and see if at some point it'll get fixed logger.info("--> trying to wait"); assertTrue(awaitBusy(() -> { - boolean errorOccurred = false; - for (int i = 0; i < iterations; i++) { - SearchResponse searchResponse = client().prepareSearch().setSize(0).setQuery(matchAllQuery()).get(); - if (searchResponse.getHits().totalHits() != numberOfDocs) { - errorOccurred = true; - } - } - return !errorOccurred; - }, - 5, - TimeUnit.MINUTES + boolean errorOccurred = false; + for (int i = 0; i < iterations; i++) { + SearchResponse searchResponse = client().prepareSearch().setSize(0).setQuery(matchAllQuery()).get(); + if (searchResponse.getHits().totalHits() != numberOfDocs) { + errorOccurred = true; + } + } + return !errorOccurred; + }, + 5, + TimeUnit.MINUTES ) ); } From 30ad72130879c7bd254071378a103006c5f1e994 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Tue, 17 Nov 2015 13:51:25 +0100 Subject: [PATCH 07/12] Initial implementation --- .../stats/TransportClusterStatsAction.java | 5 +- .../admin/indices/stats/CommonStatsFlags.java | 1 - .../admin/indices/stats/ShardStats.java | 17 ++- .../stats/TransportIndicesStatsAction.java | 3 +- .../metadata/MetaDataIndexUpgradeService.java | 79 +++++----- .../common/io/stream/StreamInput.java | 25 +-- .../common/io/stream/StreamOutput.java | 12 ++ .../EsRejectedExecutionException.java | 12 +- .../elasticsearch/index/engine/Engine.java | 4 + .../index/engine/InternalEngine.java | 123 ++++++++------- .../index/engine/ShadowEngine.java | 6 + .../index/seqno/LocalCheckpointService.java | 144 ++++++++++++++++++ .../elasticsearch/index/seqno/SeqNoStats.java | 76 +++++++++ .../index/seqno/SequenceNumbersService.java | 29 ++-- .../elasticsearch/index/shard/IndexShard.java | 10 ++ .../index/shard/ShadowIndexShard.java | 10 +- .../elasticsearch/indices/IndicesService.java | 4 +- .../elasticsearch/cluster/DiskUsageTests.java | 4 +- .../index/shard/IndexShardTests.java | 3 +- 19 files changed, 432 insertions(+), 135 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java create mode 100644 core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 3fba14e72bc79..91e1f82658739 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -19,7 +19,6 @@ package org.elasticsearch.action.admin.cluster.stats; -import org.elasticsearch.cluster.health.ClusterHealthStatus; import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; import org.elasticsearch.action.admin.indices.stats.CommonStats; @@ -30,6 +29,7 @@ import org.elasticsearch.action.support.nodes.TransportNodesAction; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterService; +import org.elasticsearch.cluster.health.ClusterHealthStatus; import org.elasticsearch.cluster.health.ClusterStateHealth; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.common.inject.Inject; @@ -105,7 +105,8 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq for (IndexShard indexShard : indexService) { if (indexShard.routingEntry() != null && indexShard.routingEntry().active()) { // only report on fully started shards - shardsStats.add(new ShardStats(indexShard.routingEntry(), indexShard.shardPath(), new CommonStats(indexShard, SHARD_STATS_FLAGS), indexShard.commitStats())); + shardsStats.add(new ShardStats(indexShard.routingEntry(), indexShard.shardPath(), + new CommonStats(indexShard, SHARD_STATS_FLAGS), indexShard.commitStats(), indexShard.seqNoStats())); } } } diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/CommonStatsFlags.java b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/CommonStatsFlags.java index fb306337886af..037bf8575eeea 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/CommonStatsFlags.java @@ -227,7 +227,6 @@ public static enum Flag { RequestCache("request_cache"), Recovery("recovery"); - private final String restName; Flag(String restName) { diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java index 8fea8c795ebd5..e76e1a86eb218 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java @@ -28,7 +28,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.index.engine.CommitStats; -import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.shard.ShardPath; import java.io.IOException; @@ -42,6 +42,8 @@ public class ShardStats implements Streamable, ToXContent { private CommonStats commonStats; @Nullable private CommitStats commitStats; + @Nullable + private SeqNoStats seqNoStats; private String dataPath; private String statePath; private boolean isCustomDataPath; @@ -49,13 +51,14 @@ public class ShardStats implements Streamable, ToXContent { ShardStats() { } - public ShardStats(ShardRouting routing, ShardPath shardPath, CommonStats commonStats, CommitStats commitStats) { + public ShardStats(ShardRouting routing, ShardPath shardPath, CommonStats commonStats, CommitStats commitStats, SeqNoStats seqNoStats) { this.shardRouting = routing; this.dataPath = shardPath.getRootDataPath().toString(); this.statePath = shardPath.getRootStatePath().toString(); this.isCustomDataPath = shardPath.isCustomDataPath(); this.commitStats = commitStats; this.commonStats = commonStats; + this.seqNoStats = seqNoStats; } /** @@ -73,6 +76,11 @@ public CommitStats getCommitStats() { return this.commitStats; } + @Nullable + public SeqNoStats getSeqNoStats() { + return this.seqNoStats; + } + public String getDataPath() { return dataPath; } @@ -99,6 +107,7 @@ public void readFrom(StreamInput in) throws IOException { statePath = in.readString(); dataPath = in.readString(); isCustomDataPath = in.readBoolean(); + seqNoStats = in.readOptionalStreamable(SeqNoStats.PROTOTYPE); } @Override @@ -109,6 +118,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(statePath); out.writeString(dataPath); out.writeBoolean(isCustomDataPath); + out.writeOptionalStreamable(seqNoStats); } @Override @@ -124,6 +134,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (commitStats != null) { commitStats.toXContent(builder, params); } + if (seqNoStats != null) { + seqNoStats.toXContent(builder, params); + } builder.startObject(Fields.SHARD_PATH); builder.field(Fields.STATE_PATH, statePath); builder.field(Fields.DATA_PATH, dataPath); diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/TransportIndicesStatsAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/TransportIndicesStatsAction.java index d5de67da478bb..e5fd4d4120879 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/TransportIndicesStatsAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/TransportIndicesStatsAction.java @@ -162,6 +162,7 @@ protected ShardStats shardOperation(IndicesStatsRequest request, ShardRouting sh flags.set(CommonStatsFlags.Flag.Recovery); } - return new ShardStats(indexShard.routingEntry(), indexShard.shardPath(), new CommonStats(indexShard, flags), indexShard.commitStats()); + return new ShardStats(indexShard.routingEntry(), indexShard.shardPath(), + new CommonStats(indexShard, flags), indexShard.commitStats(), indexShard.seqNoStats()); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index 00904af89155b..70a492954d120 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -19,7 +19,6 @@ package org.elasticsearch.cluster.metadata; import com.carrotsearch.hppc.cursors.ObjectCursor; - import org.apache.lucene.analysis.Analyzer; import org.elasticsearch.Version; import org.elasticsearch.cluster.routing.UnassignedInfo; @@ -30,6 +29,7 @@ import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.seqno.LocalCheckpointService; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.indices.mapper.MapperRegistry; @@ -116,42 +116,43 @@ private static boolean isSupportedVersion(IndexMetaData indexMetaData) { /** All known byte-sized settings for an index. */ public static final Set INDEX_BYTES_SIZE_SETTINGS = unmodifiableSet(newHashSet( - "index.merge.policy.floor_segment", - "index.merge.policy.max_merged_segment", - "index.merge.policy.max_merge_size", - "index.merge.policy.min_merge_size", - "index.shard.recovery.file_chunk_size", - "index.shard.recovery.translog_size", - "index.store.throttle.max_bytes_per_sec", - "index.translog.flush_threshold_size", - "index.translog.fs.buffer_size", - "index.version_map_size")); + "index.merge.policy.floor_segment", + "index.merge.policy.max_merged_segment", + "index.merge.policy.max_merge_size", + "index.merge.policy.min_merge_size", + "index.shard.recovery.file_chunk_size", + "index.shard.recovery.translog_size", + "index.store.throttle.max_bytes_per_sec", + "index.translog.flush_threshold_size", + "index.translog.fs.buffer_size", + "index.version_map_size")); /** All known time settings for an index. */ public static final Set INDEX_TIME_SETTINGS = unmodifiableSet(newHashSet( - "index.gateway.wait_for_mapping_update_post_recovery", - "index.shard.wait_for_mapping_update_post_recovery", - "index.gc_deletes", - "index.indexing.slowlog.threshold.index.debug", - "index.indexing.slowlog.threshold.index.info", - "index.indexing.slowlog.threshold.index.trace", - "index.indexing.slowlog.threshold.index.warn", - "index.refresh_interval", - "index.search.slowlog.threshold.fetch.debug", - "index.search.slowlog.threshold.fetch.info", - "index.search.slowlog.threshold.fetch.trace", - "index.search.slowlog.threshold.fetch.warn", - "index.search.slowlog.threshold.query.debug", - "index.search.slowlog.threshold.query.info", - "index.search.slowlog.threshold.query.trace", - "index.search.slowlog.threshold.query.warn", - "index.shadow.wait_for_initial_commit", - "index.store.stats_refresh_interval", - "index.translog.flush_threshold_period", - "index.translog.interval", - "index.translog.sync_interval", - "index.shard.inactive_time", - UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING)); + "index.gateway.wait_for_mapping_update_post_recovery", + "index.shard.wait_for_mapping_update_post_recovery", + "index.gc_deletes", + "index.indexing.slowlog.threshold.index.debug", + "index.indexing.slowlog.threshold.index.info", + "index.indexing.slowlog.threshold.index.trace", + "index.indexing.slowlog.threshold.index.warn", + "index.refresh_interval", + "index.search.slowlog.threshold.fetch.debug", + "index.search.slowlog.threshold.fetch.info", + "index.search.slowlog.threshold.fetch.trace", + "index.search.slowlog.threshold.fetch.warn", + "index.search.slowlog.threshold.query.debug", + "index.search.slowlog.threshold.query.info", + "index.search.slowlog.threshold.query.trace", + "index.search.slowlog.threshold.query.warn", + "index.shadow.wait_for_initial_commit", + "index.store.stats_refresh_interval", + "index.translog.flush_threshold_period", + "index.translog.interval", + "index.translog.sync_interval", + "index.shard.inactive_time", + LocalCheckpointService.SETTINGS_INDEX_LAG_MAX_WAIT, + UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING)); /** * Elasticsearch 2.0 requires units on byte/memory and time settings; this method adds the default unit to any such settings that are @@ -163,7 +164,7 @@ private IndexMetaData addDefaultUnitsIfNeeded(IndexMetaData indexMetaData) { // Created lazily if we find any settings that are missing units: Settings settings = indexMetaData.getSettings(); Settings.Builder newSettings = null; - for(String byteSizeSetting : INDEX_BYTES_SIZE_SETTINGS) { + for (String byteSizeSetting : INDEX_BYTES_SIZE_SETTINGS) { String value = settings.get(byteSizeSetting); if (value != null) { try { @@ -180,7 +181,7 @@ private IndexMetaData addDefaultUnitsIfNeeded(IndexMetaData indexMetaData) { newSettings.put(byteSizeSetting, value + "b"); } } - for(String timeSetting : INDEX_TIME_SETTINGS) { + for (String timeSetting : INDEX_TIME_SETTINGS) { String value = settings.get(timeSetting); if (value != null) { try { @@ -200,9 +201,9 @@ private IndexMetaData addDefaultUnitsIfNeeded(IndexMetaData indexMetaData) { if (newSettings != null) { // At least one setting was changed: return IndexMetaData.builder(indexMetaData) - .version(indexMetaData.getVersion()) - .settings(newSettings.build()) - .build(); + .version(indexMetaData.getVersion()) + .settings(newSettings.build()) + .build(); } } diff --git a/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index 20859e2716a8d..22c71b3df8f09 100644 --- a/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -40,19 +40,9 @@ import org.joda.time.DateTime; import org.joda.time.DateTimeZone; -import java.io.ByteArrayInputStream; -import java.io.EOFException; -import java.io.FileNotFoundException; -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; +import java.io.*; import java.nio.file.NoSuchFileException; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.function.Supplier; import static org.elasticsearch.ElasticsearchException.readException; @@ -543,6 +533,17 @@ public T readOptionalStreamable(Supplier supplier) thr } } + /** + * Serializes a potential null value. + */ + public > T readOptionalStreamable(StreamableReader streamableReader) throws IOException { + if (readBoolean()) { + return streamableReader.readFrom(this); + } else { + return null; + } + } + public T readThrowable() throws IOException { if (readBoolean()) { int key = readVInt(); diff --git a/core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index 5f1e7623d2822..0285e56355792 100644 --- a/core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -503,6 +503,18 @@ public void writeOptionalStreamable(@Nullable Streamable streamable) throws IOEx } } + /** + * Serializes a potential null value. + */ + public void writeOptionalStreamable(@Nullable Writeable writeable) throws IOException { + if (writeable != null) { + writeBoolean(true); + writeable.writeTo(this); + } else { + writeBoolean(false); + } + } + public void writeThrowable(Throwable throwable) throws IOException { if (throwable == null) { writeBoolean(false); diff --git a/core/src/main/java/org/elasticsearch/common/util/concurrent/EsRejectedExecutionException.java b/core/src/main/java/org/elasticsearch/common/util/concurrent/EsRejectedExecutionException.java index d75b3ffa8c264..8033750d1d24e 100644 --- a/core/src/main/java/org/elasticsearch/common/util/concurrent/EsRejectedExecutionException.java +++ b/core/src/main/java/org/elasticsearch/common/util/concurrent/EsRejectedExecutionException.java @@ -31,13 +31,17 @@ public class EsRejectedExecutionException extends ElasticsearchException { private final boolean isExecutorShutdown; - public EsRejectedExecutionException(String message, boolean isExecutorShutdown) { - super(message); + public EsRejectedExecutionException(String message, boolean isExecutorShutdown, Object... args) { + super(message, args); this.isExecutorShutdown = isExecutorShutdown; } - public EsRejectedExecutionException(String message) { - this(message, false); + public EsRejectedExecutionException(String message, Object... args) { + this(message, false, args); + } + + public EsRejectedExecutionException(String message, boolean isExecutorShutdown) { + this(message, isExecutorShutdown, new Object[0]); } public EsRejectedExecutionException() { diff --git a/core/src/main/java/org/elasticsearch/index/engine/Engine.java b/core/src/main/java/org/elasticsearch/index/engine/Engine.java index bccae2e46642a..2bd714c2d26b3 100644 --- a/core/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/core/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -45,6 +45,7 @@ import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.merge.MergeStats; +import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.seqno.SequenceNumbersService; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.store.Store; @@ -325,6 +326,9 @@ public CommitStats commitStats() { return new CommitStats(getLastCommittedSegmentInfos()); } + /** get sequence number related stats */ + public abstract SeqNoStats seqNoStats(); + /** * Read the last segments info from the commit pointed to by the searcher manager */ diff --git a/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index dbb62a735a064..81b9ab44851a5 100644 --- a/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -50,6 +50,7 @@ import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.merge.MergeStats; import org.elasticsearch.index.merge.OnGoingMerge; +import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.seqno.SequenceNumbersService; import org.elasticsearch.index.shard.ElasticsearchMergePolicy; import org.elasticsearch.index.shard.MergeSchedulerConfig; @@ -348,10 +349,6 @@ public boolean index(Index index) { } catch (OutOfMemoryError | IllegalStateException | IOException t) { maybeFailEngine("index", t); throw new IndexFailedEngineException(shardId, index.type(), index.id(), t); - } finally { - if (index.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) { - seqNoService.markSeqNoAsCompleted(index.seqNo()); - } } checkVersionMapRefresh(); return created; @@ -359,7 +356,7 @@ public boolean index(Index index) { private boolean innerIndex(Index index) throws IOException { synchronized (dirtyLock(index.uid())) { - lastWriteNanos = index.startTime(); + lastWriteNanos = index.startTime(); final long currentVersion; final boolean deleted; VersionValue versionValue = versionMap.getUnderLock(index.uid().bytes()); @@ -388,37 +385,47 @@ private boolean innerIndex(Index index) throws IOException { final boolean created; index.updateVersion(updatedVersion); + final long seqNo; if (index.origin() == Operation.Origin.PRIMARY) { - index.updateSeqNo(seqNoService.generateSeqNo()); - } - - if (currentVersion == Versions.NOT_FOUND) { - // document does not exists, we can optimize for create - created = true; - if (index.docs().size() > 1) { - indexWriter.addDocuments(index.docs()); - } else { - indexWriter.addDocument(index.docs().get(0)); - } + seqNo = seqNoService.generateSeqNo(); } else { - if (versionValue != null) { - created = versionValue.delete(); // we have a delete which is not GC'ed... - } else { - created = false; + seqNo = index.seqNo(); + seqNoService.markSeqNoAsStarted(seqNo); + } + try { + if (index.origin() == Operation.Origin.PRIMARY) { + index.updateSeqNo(seqNo); } - if (index.docs().size() > 1) { - indexWriter.updateDocuments(index.uid(), index.docs()); + if (currentVersion == Versions.NOT_FOUND) { + // document does not exists, we can optimize for create + created = true; + if (index.docs().size() > 1) { + indexWriter.addDocuments(index.docs()); + } else { + indexWriter.addDocument(index.docs().get(0)); + } } else { - indexWriter.updateDocument(index.uid(), index.docs().get(0)); + if (versionValue != null) { + created = versionValue.delete(); // we have a delete which is not GC'ed... + } else { + created = false; + } + if (index.docs().size() > 1) { + indexWriter.updateDocuments(index.uid(), index.docs()); + } else { + indexWriter.updateDocument(index.uid(), index.docs().get(0)); + } } - } - Translog.Location translogLocation = translog.add(new Translog.Index(index)); + Translog.Location translogLocation = translog.add(new Translog.Index(index)); - versionMap.putUnderLock(index.uid().bytes(), new VersionValue(updatedVersion, translogLocation)); - index.setTranslogLocation(translogLocation); + versionMap.putUnderLock(index.uid().bytes(), new VersionValue(updatedVersion, translogLocation)); + index.setTranslogLocation(translogLocation); - indexingService.postIndexUnderLock(index); - return created; + indexingService.postIndexUnderLock(index); + return created; + } finally { + seqNoService.markSeqNoAsCompleted(seqNo); + } } } @@ -458,10 +465,6 @@ public void delete(Delete delete) throws EngineException { } catch (OutOfMemoryError | IllegalStateException | IOException t) { maybeFailEngine("delete", t); throw new DeleteFailedEngineException(shardId, delete, t); - } finally { - if (delete.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) { - seqNoService.markSeqNoAsCompleted(delete.seqNo()); - } } maybePruneDeletedTombstones(); @@ -506,28 +509,39 @@ private void innerDelete(Delete delete) throws IOException { } updatedVersion = delete.versionType().updateVersion(currentVersion, expectedVersion); + final long seqNo; if (delete.origin() == Operation.Origin.PRIMARY) { - delete.updateSeqNo(seqNoService.generateSeqNo()); - } - - final boolean found; - if (currentVersion == Versions.NOT_FOUND) { - // doc does not exist and no prior deletes - found = false; - } else if (versionValue != null && versionValue.delete()) { - // a "delete on delete", in this case, we still increment the version, log it, and return that version - found = false; + seqNo = seqNoService.generateSeqNo(); } else { - // we deleted a currently existing document - indexWriter.deleteDocuments(delete.uid()); - found = true; + seqNo = delete.seqNo(); + seqNoService.markSeqNoAsStarted(seqNo); } + try { + if (delete.origin() == Operation.Origin.PRIMARY) { + delete.updateSeqNo(seqNo); + } - delete.updateVersion(updatedVersion, found); - Translog.Location translogLocation = translog.add(new Translog.Delete(delete)); - versionMap.putUnderLock(delete.uid().bytes(), new DeleteVersionValue(updatedVersion, engineConfig.getThreadPool().estimatedTimeInMillis(), translogLocation)); - delete.setTranslogLocation(translogLocation); - indexingService.postDeleteUnderLock(delete); + final boolean found; + if (currentVersion == Versions.NOT_FOUND) { + // doc does not exist and no prior deletes + found = false; + } else if (versionValue != null && versionValue.delete()) { + // a "delete on delete", in this case, we still increment the version, log it, and return that version + found = false; + } else { + // we deleted a currently existing document + indexWriter.deleteDocuments(delete.uid()); + found = true; + } + + delete.updateVersion(updatedVersion, found); + Translog.Location translogLocation = translog.add(new Translog.Delete(delete)); + versionMap.putUnderLock(delete.uid().bytes(), new DeleteVersionValue(updatedVersion, engineConfig.getThreadPool().estimatedTimeInMillis(), translogLocation)); + delete.setTranslogLocation(translogLocation); + indexingService.postDeleteUnderLock(delete); + } finally { + seqNoService.markSeqNoAsCompleted(seqNo); + } } } @@ -988,7 +1002,7 @@ final static class SearchFactory extends EngineSearcherFactory { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException { IndexSearcher searcher = super.newSearcher(reader, previousReader); - if (reader instanceof LeafReader && isMergedSegment((LeafReader)reader)) { + if (reader instanceof LeafReader && isMergedSegment((LeafReader) reader)) { // we call newSearcher from the IndexReaderWarmer which warms segments during merging // in that case the reader is a LeafReader and all we need to do is to build a new Searcher // and return it since it does it's own warming for that particular reader. @@ -1178,4 +1192,9 @@ public void onSettingsChanged() { public MergeStats getMergeStats() { return mergeScheduler.stats(); } + + @Override + public SeqNoStats seqNoStats() { + return seqNoService.stats(); + } } diff --git a/core/src/main/java/org/elasticsearch/index/engine/ShadowEngine.java b/core/src/main/java/org/elasticsearch/index/engine/ShadowEngine.java index af3e0ae82a8ec..dd9ff4375e854 100644 --- a/core/src/main/java/org/elasticsearch/index/engine/ShadowEngine.java +++ b/core/src/main/java/org/elasticsearch/index/engine/ShadowEngine.java @@ -30,6 +30,7 @@ import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.ReleasableLock; +import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.translog.Translog; import java.io.IOException; @@ -231,4 +232,9 @@ public long indexWriterRAMBytesUsed() { // No IndexWriter throw new UnsupportedOperationException("ShadowEngine has no IndexWriter"); } + + @Override + public SeqNoStats seqNoStats() { + throw new UnsupportedOperationException("ShadowEngine doesn't track sequence numbers"); + } } diff --git a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java new file mode 100644 index 0000000000000..bad0b0e5bd88d --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java @@ -0,0 +1,144 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.seqno; + +import org.apache.lucene.util.FixedBitSet; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.shard.AbstractIndexShardComponent; +import org.elasticsearch.index.shard.ShardId; + +public class LocalCheckpointService extends AbstractIndexShardComponent { + + public static String SETTINGS_INDEX_LAG_THRESHOLD = "index.seq_no.index_lag.threshold"; + public static String SETTINGS_INDEX_LAG_MAX_WAIT = "index.seq_no.index_lag.max_wait"; + + final Object mutex = new Object(); + final FixedBitSet processedSeqNo; + final int indexLagThreshold; + final TimeValue indexLagMaxWait; + + + volatile long nextSeqNo = 0; + volatile long checkpoint = -1; + + public LocalCheckpointService(ShardId shardId, IndexSettings indexSettings) { + super(shardId, indexSettings); + indexLagThreshold = indexSettings.getSettings().getAsInt(SETTINGS_INDEX_LAG_THRESHOLD, 1024); + indexLagMaxWait = indexSettings.getSettings().getAsTime(SETTINGS_INDEX_LAG_MAX_WAIT, TimeValue.timeValueSeconds(30)); + processedSeqNo = new FixedBitSet(indexLagThreshold); + + } + + public long generateSeqNo() { + synchronized (mutex) { + // we have to keep checking when ensure capacity returns because it release the lock and nextSeqNo may change + while (hasCapacity(nextSeqNo) == false) { + ensureCapacity(nextSeqNo); + } + return nextSeqNo++; + } + } + + public void markSeqNoAsStarted(long seqNo) { + synchronized (mutex) { + // make sure we track highest seen seqNo + if (seqNo >= nextSeqNo) { + nextSeqNo = seqNo + 1; + } + if (seqNo <= checkpoint) { + // this is possible during recover where we might replay an op that was also replicated + return; + } + ensureCapacity(seqNo); + assert processedSeqNo.get(seqNoToOffset(seqNo)) == false : "expected [" + seqNo + "] not to be marked as started"; + } + } + + public long markSeqNoAsCompleted(long seqNo) { + synchronized (mutex) { + if (seqNo <= checkpoint) { + // this is possible during recover where we might replay an op that was also replicated + return checkpoint; + } + // just to be safe (previous calls to generateSeqNo/markSeqNoAsStarted should ensure this is OK) + ensureCapacity(seqNo); + int offset = seqNoToOffset(seqNo); + processedSeqNo.set(offset); + if (seqNo == checkpoint + 1) { + do { + // clear the flag as we are making it free for future operations. do se before we expose it + // by moving the checkpoint + processedSeqNo.clear(offset); + checkpoint++; + offset = seqNoToOffset(checkpoint + 1); + } while (processedSeqNo.get(offset)); + mutex.notifyAll(); + } + } + return checkpoint; + } + + public long getCheckpoint() { + return checkpoint; + } + + public long getMaxSeqNo() { + return nextSeqNo - 1; + } + + + private boolean hasCapacity(long seqNo) { + assert Thread.holdsLock(mutex); + return (seqNo - checkpoint) < indexLagThreshold; + } + + private void ensureCapacity(long seqNo) { + assert Thread.holdsLock(mutex); + long retry = 0; + final long maxRetries = indexLagMaxWait.seconds(); + while (hasCapacity(seqNo) == false) { + try { + if (retry > maxRetries) { + ElasticsearchException e = new EsRejectedExecutionException("indexing lag exceeds [{}] (seq# requested [{}], local checkpoint [{}]", + indexLagThreshold, seqNo, checkpoint); + e.setShard(shardId()); + throw e; + } + + // this temporary releases the lock on mutex + mutex.wait(Math.min(1000, indexLagMaxWait.millis() - retry * 1000)); + retry++; + } catch (InterruptedException ie) { + ElasticsearchException exp = new ElasticsearchException("interrupted while waiting on index lag"); + exp.setShard(shardId()); + throw exp; + } + } + } + + private int seqNoToOffset(long seqNo) { + assert seqNo - checkpoint < indexLagThreshold; + assert seqNo > checkpoint; + return (int) (seqNo % indexLagThreshold); + } + +} diff --git a/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java b/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java new file mode 100644 index 0000000000000..a2052f75976a0 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java @@ -0,0 +1,76 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.seqno; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; + +import java.io.IOException; + +public class SeqNoStats implements ToXContent, Writeable { + + public static final SeqNoStats PROTOTYPE = new SeqNoStats(0, 0); + + final long maxSeqNo; + final long localCheckpoint; + + public SeqNoStats(long maxSeqNo, long localCheckpoint) { + this.maxSeqNo = maxSeqNo; + this.localCheckpoint = localCheckpoint; + } + + public long getMaxSeqNo() { + return maxSeqNo; + } + + public long getLocalCheckpoint() { + return localCheckpoint; + } + + @Override + public SeqNoStats readFrom(StreamInput in) throws IOException { + return new SeqNoStats(in.readLong(), in.readLong()); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeLong(maxSeqNo); + out.writeLong(localCheckpoint); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(Fields.SEQ_NO); + builder.field(Fields.MAX_SEQ_NO, maxSeqNo); + builder.field(Fields.LOCAL_CHECKPOINT, localCheckpoint); + builder.endObject(); + return builder; + } + + + static final class Fields { + static final XContentBuilderString SEQ_NO = new XContentBuilderString("seq_no"); + static final XContentBuilderString MAX_SEQ_NO = new XContentBuilderString("max"); + static final XContentBuilderString LOCAL_CHECKPOINT = new XContentBuilderString("local_checkpoint"); + } +} diff --git a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java index 46b033622432b..12a4e8bad5d50 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java @@ -22,39 +22,36 @@ import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.ShardId; -import java.util.concurrent.atomic.AtomicLong; - /** a very light weight implementation. will be replaced with proper machinery later */ public class SequenceNumbersService extends AbstractIndexShardComponent { public final static long UNASSIGNED_SEQ_NO = -1L; - - AtomicLong seqNoGenerator = new AtomicLong(); + final LocalCheckpointService localCheckpointService; public SequenceNumbersService(ShardId shardId, IndexSettings indexSettings) { super(shardId, indexSettings); + localCheckpointService = new LocalCheckpointService(shardId, indexSettings); } /** * generates a new sequence number. * Note: you must call {@link #markSeqNoAsCompleted(long)} after the operation for which this seq# was generated - * was completed (whether successfully or with a failure + * was completed (whether successfully or with a failure) */ public long generateSeqNo() { - return seqNoGenerator.getAndIncrement(); + return localCheckpointService.generateSeqNo(); + } + + public void markSeqNoAsStarted(long seqNo) { + localCheckpointService.markSeqNoAsStarted(seqNo); + } public void markSeqNoAsCompleted(long seqNo) { - // this is temporary to make things semi sane on primary promotion and recovery. will be replaced with better machinery - boolean success; - do { - long maxSeqNo = seqNoGenerator.get(); - if (seqNo > maxSeqNo) { - success = seqNoGenerator.compareAndSet(maxSeqNo, seqNo); - } else { - success = true; - } - } while (success == false); + localCheckpointService.markSeqNoAsCompleted(seqNo); } + public SeqNoStats stats() { + return new SeqNoStats(localCheckpointService.getMaxSeqNo(), localCheckpointService.getCheckpoint()); + } } diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 443696986fd11..5bc38e5616e64 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -79,6 +79,7 @@ import org.elasticsearch.index.refresh.RefreshStats; import org.elasticsearch.index.search.stats.SearchStats; import org.elasticsearch.index.search.stats.ShardSearchStats; +import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.seqno.SequenceNumbersService; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.snapshots.IndexShardRepository; @@ -591,6 +592,15 @@ public CommitStats commitStats() { return engine == null ? null : engine.commitStats(); } + /** + * @return {@link SeqNoStats} if engine is open, otherwise null + */ + @Nullable + public SeqNoStats seqNoStats() { + Engine engine = getEngineOrNull(); + return engine == null ? null : engine.seqNoStats(); + } + public IndexingStats indexingStats(String... types) { return indexingService.stats(types); } diff --git a/core/src/main/java/org/elasticsearch/index/shard/ShadowIndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/ShadowIndexShard.java index 50a16fa1cee70..d99e2ccd0e803 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/ShadowIndexShard.java +++ b/core/src/main/java/org/elasticsearch/index/shard/ShadowIndexShard.java @@ -18,8 +18,6 @@ */ package org.elasticsearch.index.shard; -import java.io.IOException; - import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.IndexSettings; @@ -31,10 +29,13 @@ import org.elasticsearch.index.fielddata.IndexFieldDataService; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.merge.MergeStats; +import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.store.Store; import org.elasticsearch.index.translog.TranslogStats; +import java.io.IOException; + /** * ShadowIndexShard extends {@link IndexShard} to add file synchronization * from the primary when a flush happens. It also ensures that a replica being @@ -67,6 +68,11 @@ public MergeStats mergeStats() { return new MergeStats(); } + @Override + public SeqNoStats seqNoStats() { + return null; + } + @Override public boolean canIndex() { return false; diff --git a/core/src/main/java/org/elasticsearch/indices/IndicesService.java b/core/src/main/java/org/elasticsearch/indices/IndicesService.java index dead72aee8b4f..352ca12740516 100644 --- a/core/src/main/java/org/elasticsearch/indices/IndicesService.java +++ b/core/src/main/java/org/elasticsearch/indices/IndicesService.java @@ -198,7 +198,9 @@ public NodeIndicesStats stats(boolean includePrevious, CommonStatsFlags flags) { if (indexShard.routingEntry() == null) { continue; } - IndexShardStats indexShardStats = new IndexShardStats(indexShard.shardId(), new ShardStats[] { new ShardStats(indexShard.routingEntry(), indexShard.shardPath(), new CommonStats(indexShard, flags), indexShard.commitStats()) }); + IndexShardStats indexShardStats = new IndexShardStats(indexShard.shardId(), + new ShardStats[]{new ShardStats(indexShard.routingEntry(), indexShard.shardPath(), + new CommonStats(indexShard, flags), indexShard.commitStats(), indexShard.seqNoStats())}); if (!statsByShard.containsKey(indexService.index())) { statsByShard.put(indexService.index(), arrayAsArrayList(indexShardStats)); } else { diff --git a/core/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java b/core/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java index 1be999d8fedce..9ff9ee9ee437c 100644 --- a/core/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java @@ -104,8 +104,8 @@ public void testFillShardLevelInfo() { CommonStats commonStats1 = new CommonStats(); commonStats1.store = new StoreStats(1000, 1); ShardStats[] stats = new ShardStats[]{ - new ShardStats(test_0, new ShardPath(false, test0Path, test0Path, "0xdeadbeef", test_0.shardId()), commonStats0, null), - new ShardStats(test_1, new ShardPath(false, test1Path, test1Path, "0xdeadbeef", test_1.shardId()), commonStats1, null) + new ShardStats(test_0, new ShardPath(false, test0Path, test0Path, "0xdeadbeef", test_0.shardId()), commonStats0, null, null), + new ShardStats(test_1, new ShardPath(false, test1Path, test1Path, "0xdeadbeef", test_1.shardId()), commonStats1, null, null) }; ImmutableOpenMap.Builder shardSizes = ImmutableOpenMap.builder(); ImmutableOpenMap.Builder routingToPath = ImmutableOpenMap.builder(); diff --git a/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 1923c1e2cffa4..0fd9939e613a3 100644 --- a/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -591,7 +591,8 @@ public void testShardStats() throws IOException { IndicesService indicesService = getInstanceFromNode(IndicesService.class); IndexService test = indicesService.indexService("test"); IndexShard shard = test.getShardOrNull(0); - ShardStats stats = new ShardStats(shard.routingEntry(), shard.shardPath(), new CommonStats(shard, new CommonStatsFlags()), shard.commitStats()); + ShardStats stats = new ShardStats(shard.routingEntry(), shard.shardPath(), new CommonStats(shard, new CommonStatsFlags()), + shard.commitStats(), shard.seqNoStats()); assertEquals(shard.shardPath().getRootDataPath().toString(), stats.getDataPath()); assertEquals(shard.shardPath().getRootStatePath().toString(), stats.getStatePath()); assertEquals(shard.shardPath().isCustomDataPath(), stats.isCustomDataPath()); From 9aea462fe8d67baf96e4dbb16e02ff9f466b115c Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Sun, 29 Nov 2015 22:58:55 +0100 Subject: [PATCH 08/12] Add tests plus other changes --- .../admin/indices/stats/ShardStats.java | 2 +- .../common/io/stream/StreamInput.java | 2 +- .../index/engine/InternalEngine.java | 124 ++++----- .../index/seqno/LocalCheckpointService.java | 23 +- .../index/seqno/SequenceNumbersService.java | 5 - .../index/engine/InternalEngineTests.java | 47 ++++ .../seqno/LocalCheckpointServiceTests.java | 257 ++++++++++++++++++ .../org/elasticsearch/test/ESTestCase.java | 22 +- 8 files changed, 384 insertions(+), 98 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java index e76e1a86eb218..3b1ff2e538de8 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java @@ -107,7 +107,7 @@ public void readFrom(StreamInput in) throws IOException { statePath = in.readString(); dataPath = in.readString(); isCustomDataPath = in.readBoolean(); - seqNoStats = in.readOptionalStreamable(SeqNoStats.PROTOTYPE); + seqNoStats = in.readOptionalStreamableReader(SeqNoStats.PROTOTYPE); } @Override diff --git a/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index 22c71b3df8f09..23d414c851909 100644 --- a/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -536,7 +536,7 @@ public T readOptionalStreamable(Supplier supplier) thr /** * Serializes a potential null value. */ - public > T readOptionalStreamable(StreamableReader streamableReader) throws IOException { + public > T readOptionalStreamableReader(StreamableReader streamableReader) throws IOException { if (readBoolean()) { return streamableReader.readFrom(this); } else { diff --git a/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index 81b9ab44851a5..488222487630f 100644 --- a/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -356,45 +356,38 @@ public boolean index(Index index) { private boolean innerIndex(Index index) throws IOException { synchronized (dirtyLock(index.uid())) { - lastWriteNanos = index.startTime(); - final long currentVersion; - final boolean deleted; - VersionValue versionValue = versionMap.getUnderLock(index.uid().bytes()); - if (versionValue == null) { - currentVersion = loadCurrentVersionFromIndex(index.uid()); - deleted = currentVersion == Versions.NOT_FOUND; - } else { - deleted = versionValue.delete(); - if (engineConfig.isEnableGcDeletes() && versionValue.delete() && (engineConfig.getThreadPool().estimatedTimeInMillis() - versionValue.time()) > engineConfig.getGcDeletesInMillis()) { - currentVersion = Versions.NOT_FOUND; // deleted, and GC + try { + lastWriteNanos = index.startTime(); + final long currentVersion; + final boolean deleted; + VersionValue versionValue = versionMap.getUnderLock(index.uid().bytes()); + if (versionValue == null) { + currentVersion = loadCurrentVersionFromIndex(index.uid()); + deleted = currentVersion == Versions.NOT_FOUND; } else { - currentVersion = versionValue.version(); + deleted = versionValue.delete(); + if (engineConfig.isEnableGcDeletes() && versionValue.delete() && (engineConfig.getThreadPool().estimatedTimeInMillis() - versionValue.time()) > engineConfig.getGcDeletesInMillis()) { + currentVersion = Versions.NOT_FOUND; // deleted, and GC + } else { + currentVersion = versionValue.version(); + } } - } - long expectedVersion = index.version(); - if (index.versionType().isVersionConflictForWrites(currentVersion, expectedVersion, deleted)) { - if (index.origin() == Operation.Origin.RECOVERY) { - return false; - } else { - throw new VersionConflictEngineException(shardId, index.type(), index.id(), - index.versionType().explainConflictForWrites(currentVersion, expectedVersion, deleted)); + long expectedVersion = index.version(); + if (index.versionType().isVersionConflictForWrites(currentVersion, expectedVersion, deleted)) { + if (index.origin() == Operation.Origin.RECOVERY) { + return false; + } else { + throw new VersionConflictEngineException(shardId, index.type(), index.id(), + index.versionType().explainConflictForWrites(currentVersion, expectedVersion, deleted)); + } } - } - long updatedVersion = index.versionType().updateVersion(currentVersion, expectedVersion); + long updatedVersion = index.versionType().updateVersion(currentVersion, expectedVersion); - final boolean created; - index.updateVersion(updatedVersion); - final long seqNo; - if (index.origin() == Operation.Origin.PRIMARY) { - seqNo = seqNoService.generateSeqNo(); - } else { - seqNo = index.seqNo(); - seqNoService.markSeqNoAsStarted(seqNo); - } - try { + final boolean created; + index.updateVersion(updatedVersion); if (index.origin() == Operation.Origin.PRIMARY) { - index.updateSeqNo(seqNo); + index.updateSeqNo(seqNoService.generateSeqNo()); } if (currentVersion == Versions.NOT_FOUND) { // document does not exists, we can optimize for create @@ -424,7 +417,9 @@ private boolean innerIndex(Index index) throws IOException { indexingService.postIndexUnderLock(index); return created; } finally { - seqNoService.markSeqNoAsCompleted(seqNo); + if (index.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) { + seqNoService.markSeqNoAsCompleted(index.seqNo()); + } } } } @@ -481,44 +476,37 @@ private void maybePruneDeletedTombstones() { private void innerDelete(Delete delete) throws IOException { synchronized (dirtyLock(delete.uid())) { - lastWriteNanos = delete.startTime(); - final long currentVersion; - final boolean deleted; - VersionValue versionValue = versionMap.getUnderLock(delete.uid().bytes()); - if (versionValue == null) { - currentVersion = loadCurrentVersionFromIndex(delete.uid()); - deleted = currentVersion == Versions.NOT_FOUND; - } else { - deleted = versionValue.delete(); - if (engineConfig.isEnableGcDeletes() && versionValue.delete() && (engineConfig.getThreadPool().estimatedTimeInMillis() - versionValue.time()) > engineConfig.getGcDeletesInMillis()) { - currentVersion = Versions.NOT_FOUND; // deleted, and GC + try { + lastWriteNanos = delete.startTime(); + final long currentVersion; + final boolean deleted; + VersionValue versionValue = versionMap.getUnderLock(delete.uid().bytes()); + if (versionValue == null) { + currentVersion = loadCurrentVersionFromIndex(delete.uid()); + deleted = currentVersion == Versions.NOT_FOUND; } else { - currentVersion = versionValue.version(); + deleted = versionValue.delete(); + if (engineConfig.isEnableGcDeletes() && versionValue.delete() && (engineConfig.getThreadPool().estimatedTimeInMillis() - versionValue.time()) > engineConfig.getGcDeletesInMillis()) { + currentVersion = Versions.NOT_FOUND; // deleted, and GC + } else { + currentVersion = versionValue.version(); + } } - } - long updatedVersion; - long expectedVersion = delete.version(); - if (delete.versionType().isVersionConflictForWrites(currentVersion, expectedVersion, deleted)) { - if (delete.origin() == Operation.Origin.RECOVERY) { - return; - } else { - throw new VersionConflictEngineException(shardId, delete.type(), delete.id(), - delete.versionType().explainConflictForWrites(currentVersion, expectedVersion, deleted)); + long updatedVersion; + long expectedVersion = delete.version(); + if (delete.versionType().isVersionConflictForWrites(currentVersion, expectedVersion, deleted)) { + if (delete.origin() == Operation.Origin.RECOVERY) { + return; + } else { + throw new VersionConflictEngineException(shardId, delete.type(), delete.id(), + delete.versionType().explainConflictForWrites(currentVersion, expectedVersion, deleted)); + } } - } - updatedVersion = delete.versionType().updateVersion(currentVersion, expectedVersion); + updatedVersion = delete.versionType().updateVersion(currentVersion, expectedVersion); - final long seqNo; - if (delete.origin() == Operation.Origin.PRIMARY) { - seqNo = seqNoService.generateSeqNo(); - } else { - seqNo = delete.seqNo(); - seqNoService.markSeqNoAsStarted(seqNo); - } - try { if (delete.origin() == Operation.Origin.PRIMARY) { - delete.updateSeqNo(seqNo); + delete.updateSeqNo(seqNoService.generateSeqNo()); } final boolean found; @@ -540,7 +528,9 @@ private void innerDelete(Delete delete) throws IOException { delete.setTranslogLocation(translogLocation); indexingService.postDeleteUnderLock(delete); } finally { - seqNoService.markSeqNoAsCompleted(seqNo); + if (delete.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) { + seqNoService.markSeqNoAsCompleted(delete.seqNo()); + } } } } diff --git a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java index bad0b0e5bd88d..49cdb7d921fdd 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java @@ -30,6 +30,8 @@ public class LocalCheckpointService extends AbstractIndexShardComponent { public static String SETTINGS_INDEX_LAG_THRESHOLD = "index.seq_no.index_lag.threshold"; public static String SETTINGS_INDEX_LAG_MAX_WAIT = "index.seq_no.index_lag.max_wait"; + final static int DEFAULT_INDEX_LAG_THRESHOLD = 1024; + final static TimeValue DEFAULT_INDEX_LAG_MAX_WAIT = TimeValue.timeValueSeconds(30); final Object mutex = new Object(); final FixedBitSet processedSeqNo; @@ -42,8 +44,8 @@ public class LocalCheckpointService extends AbstractIndexShardComponent { public LocalCheckpointService(ShardId shardId, IndexSettings indexSettings) { super(shardId, indexSettings); - indexLagThreshold = indexSettings.getSettings().getAsInt(SETTINGS_INDEX_LAG_THRESHOLD, 1024); - indexLagMaxWait = indexSettings.getSettings().getAsTime(SETTINGS_INDEX_LAG_MAX_WAIT, TimeValue.timeValueSeconds(30)); + indexLagThreshold = indexSettings.getSettings().getAsInt(SETTINGS_INDEX_LAG_THRESHOLD, DEFAULT_INDEX_LAG_THRESHOLD); + indexLagMaxWait = indexSettings.getSettings().getAsTime(SETTINGS_INDEX_LAG_MAX_WAIT, DEFAULT_INDEX_LAG_MAX_WAIT); processedSeqNo = new FixedBitSet(indexLagThreshold); } @@ -58,23 +60,12 @@ public long generateSeqNo() { } } - public void markSeqNoAsStarted(long seqNo) { + public long markSeqNoAsCompleted(long seqNo) { synchronized (mutex) { // make sure we track highest seen seqNo if (seqNo >= nextSeqNo) { nextSeqNo = seqNo + 1; } - if (seqNo <= checkpoint) { - // this is possible during recover where we might replay an op that was also replicated - return; - } - ensureCapacity(seqNo); - assert processedSeqNo.get(seqNoToOffset(seqNo)) == false : "expected [" + seqNo + "] not to be marked as started"; - } - } - - public long markSeqNoAsCompleted(long seqNo) { - synchronized (mutex) { if (seqNo <= checkpoint) { // this is possible during recover where we might replay an op that was also replicated return checkpoint; @@ -108,7 +99,7 @@ public long getMaxSeqNo() { private boolean hasCapacity(long seqNo) { assert Thread.holdsLock(mutex); - return (seqNo - checkpoint) < indexLagThreshold; + return (seqNo - checkpoint) <= indexLagThreshold; } private void ensureCapacity(long seqNo) { @@ -136,7 +127,7 @@ private void ensureCapacity(long seqNo) { } private int seqNoToOffset(long seqNo) { - assert seqNo - checkpoint < indexLagThreshold; + assert seqNo - checkpoint <= indexLagThreshold; assert seqNo > checkpoint; return (int) (seqNo % indexLagThreshold); } diff --git a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java index 12a4e8bad5d50..fd6fa7387e687 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java @@ -42,11 +42,6 @@ public long generateSeqNo() { return localCheckpointService.generateSeqNo(); } - public void markSeqNoAsStarted(long seqNo) { - localCheckpointService.markSeqNoAsStarted(seqNo); - - } - public void markSeqNoAsCompleted(long seqNo) { localCheckpointService.markSeqNoAsCompleted(seqNo); } diff --git a/core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index f38092d131e34..d1bf85e1f2125 100644 --- a/core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/core/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -1398,6 +1398,53 @@ public void testIndexWriterInfoStream() { } } + public void testSeqNoAndLocalCheckpoint() { + int opCount = randomIntBetween(1, 10); + long seqNoCount = -1; + for (int op = 0; op < opCount; op++) { + final String id = randomFrom("1", "2", "3"); + ParsedDocument doc = testParsedDocument(id, id, "test", null, -1, -1, testDocumentWithTextField(), B_1, null); + if (randomBoolean()) { + final Engine.Index index = new Engine.Index(newUid(id), doc, + SequenceNumbersService.UNASSIGNED_SEQ_NO, + rarely() ? 100 : Versions.MATCH_ANY, VersionType.INTERNAL, + PRIMARY, System.currentTimeMillis()); + + try { + engine.index(index); + } catch (VersionConflictEngineException e) { + // OK + } + if (index.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) { + seqNoCount++; + Engine.Index replica = new Engine.Index(index.uid(), index.parsedDoc(), index.seqNo(), + index.version(), VersionType.EXTERNAL, REPLICA, System.currentTimeMillis()); + replicaEngine.index(replica); + } + } else { + final Engine.Delete delete = new Engine.Delete("test", id, newUid(id), + SequenceNumbersService.UNASSIGNED_SEQ_NO, + rarely() ? 100 : Versions.MATCH_ANY, VersionType.INTERNAL, + PRIMARY, System.currentTimeMillis(), false); + try { + engine.delete(delete); + } catch (VersionConflictEngineException e) { + // OK + } + if (delete.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) { + seqNoCount++; + Engine.Delete replica = new Engine.Delete(delete.type(), delete.id(), delete.uid(), delete.seqNo(), + delete.version(), VersionType.EXTERNAL, REPLICA, System.currentTimeMillis(), false); + replicaEngine.delete(replica); + } + } + } + assertThat(engine.seqNoStats().getMaxSeqNo(), equalTo(seqNoCount)); + assertThat(engine.seqNoStats().getLocalCheckpoint(), equalTo(seqNoCount)); + assertThat(replicaEngine.seqNoStats().getMaxSeqNo(), equalTo(seqNoCount)); + assertThat(replicaEngine.seqNoStats().getLocalCheckpoint(), equalTo(seqNoCount)); + } + // #8603: make sure we can separately log IFD's messages public void testIndexWriterIFDInfoStream() { assumeFalse("who tests the tester?", VERBOSE); diff --git a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java new file mode 100644 index 0000000000000..73e7f839d1a26 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java @@ -0,0 +1,257 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.seqno; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.AbstractRunnable; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.IndexSettingsModule; +import org.junit.Before; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.hamcrest.Matchers.equalTo; + +public class LocalCheckpointServiceTests extends ESTestCase { + + LocalCheckpointService checkpointService; + + final int SMALL_INDEX_LAG_THRESHOLD = 10; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + checkpointService = getCheckpointService(SMALL_INDEX_LAG_THRESHOLD, LocalCheckpointService.DEFAULT_INDEX_LAG_MAX_WAIT); + } + + protected LocalCheckpointService getCheckpointService(int thresholdLag, TimeValue thresholdDelay) { + return new LocalCheckpointService( + new ShardId("test", 0), + IndexSettingsModule.newIndexSettings("test", + Settings.builder() + .put(LocalCheckpointService.SETTINGS_INDEX_LAG_THRESHOLD, thresholdLag) + .put(LocalCheckpointService.SETTINGS_INDEX_LAG_MAX_WAIT, thresholdDelay) + .build() + )); + } + + public void testSimplePrimary() { + long seqNo1, seqNo2; + assertThat(checkpointService.getCheckpoint(), equalTo(SequenceNumbersService.UNASSIGNED_SEQ_NO)); + seqNo1 = checkpointService.generateSeqNo(); + assertThat(seqNo1, equalTo(0L)); + checkpointService.markSeqNoAsCompleted(seqNo1); + assertThat(checkpointService.getCheckpoint(), equalTo(0L)); + seqNo1 = checkpointService.generateSeqNo(); + seqNo2 = checkpointService.generateSeqNo(); + assertThat(seqNo1, equalTo(1L)); + assertThat(seqNo2, equalTo(2L)); + checkpointService.markSeqNoAsCompleted(seqNo2); + assertThat(checkpointService.getCheckpoint(), equalTo(0L)); + checkpointService.markSeqNoAsCompleted(seqNo1); + assertThat(checkpointService.getCheckpoint(), equalTo(2L)); + } + + public void testSimpleReplica() { + assertThat(checkpointService.getCheckpoint(), equalTo(SequenceNumbersService.UNASSIGNED_SEQ_NO)); + checkpointService.markSeqNoAsCompleted(0L); + assertThat(checkpointService.getCheckpoint(), equalTo(0L)); + checkpointService.markSeqNoAsCompleted(2L); + assertThat(checkpointService.getCheckpoint(), equalTo(0L)); + checkpointService.markSeqNoAsCompleted(1L); + assertThat(checkpointService.getCheckpoint(), equalTo(2L)); + } + + public void testIndexThrottleSuccessPrimary() throws Exception { + LocalCheckpointService checkpoint = getCheckpointService(3, TimeValue.timeValueHours(1)); + final long seq1 = checkpoint.generateSeqNo(); + final long seq2 = checkpoint.generateSeqNo(); + final long seq3 = checkpoint.generateSeqNo(); + final CountDownLatch threadStarted = new CountDownLatch(1); + final AtomicBoolean threadDone = new AtomicBoolean(false); + Thread backgroundThread = new Thread(() -> { + threadStarted.countDown(); + checkpoint.generateSeqNo(); + threadDone.set(true); + }, "testIndexDelayPrimary"); + backgroundThread.start(); + logger.info("--> waiting for thread to start"); + threadStarted.await(); + assertFalse("background thread finished but should have waited", threadDone.get()); + checkpoint.markSeqNoAsCompleted(seq2); + assertFalse("background thread finished but should have waited (seq2 completed)", threadDone.get()); + checkpoint.markSeqNoAsCompleted(seq1); + logger.info("--> waiting for thread to stop"); + assertBusy(() -> { + assertTrue("background thread should finished after finishing seq1", threadDone.get()); + }); + } + + public void testIndexThrottleTimeoutPrimary() throws Exception { + LocalCheckpointService checkpoint = getCheckpointService(2, TimeValue.timeValueMillis(100)); + checkpoint.generateSeqNo(); + checkpoint.generateSeqNo(); + try { + checkpoint.generateSeqNo(); + fail("index operation should time out due to a large lag"); + } catch (EsRejectedExecutionException e) { + // OK! + } + } + + public void testIndexThrottleSuccessReplica() throws Exception { + LocalCheckpointService checkpoint = getCheckpointService(3, TimeValue.timeValueHours(1)); + final CountDownLatch threadStarted = new CountDownLatch(1); + final AtomicBoolean threadDone = new AtomicBoolean(false); + checkpoint.markSeqNoAsCompleted(1); + Thread backgroundThread = new Thread(() -> { + threadStarted.countDown(); + checkpoint.markSeqNoAsCompleted(3); + threadDone.set(true); + }, "testIndexDelayReplica"); + backgroundThread.start(); + logger.info("--> waiting for thread to start"); + threadStarted.await(); + assertFalse("background thread finished but should have waited", threadDone.get()); + checkpoint.markSeqNoAsCompleted(0); + logger.info("--> waiting for thread to stop"); + assertBusy(() -> { + assertTrue("background thread should finished after finishing seq1", threadDone.get()); + }); + } + + public void testIndexThrottleTimeoutReplica() throws Exception { + LocalCheckpointService checkpoint = getCheckpointService(1, TimeValue.timeValueMillis(100)); + try { + checkpoint.markSeqNoAsCompleted(1L); + fail("index operation should time out due to a large lag"); + } catch (EsRejectedExecutionException e) { + // OK! + } + checkpoint.markSeqNoAsCompleted(0L); + try { + checkpoint.markSeqNoAsCompleted(2L); + fail("index operation should time out due to a large lag"); + } catch (EsRejectedExecutionException e) { + // OK! + } + + } + + public void testConcurrentPrimary() throws InterruptedException { + Thread[] threads = new Thread[randomIntBetween(2, 5)]; + final int opsPerThread = randomIntBetween(10, 20); + final int maxOps = opsPerThread * threads.length; + final long unFinisshedSeq = randomIntBetween(maxOps - SMALL_INDEX_LAG_THRESHOLD, maxOps - 2); // make sure we won't be blocked + logger.info("--> will run [{}] threads, maxOps [{}], unfinished seq no [{}]", threads.length, maxOps, unFinisshedSeq); + final CyclicBarrier barrier = new CyclicBarrier(threads.length); + for (int t = 0; t < threads.length; t++) { + final int threadId = t; + threads[t] = new Thread(new AbstractRunnable() { + @Override + public void onFailure(Throwable t) { + throw new ElasticsearchException("failure in background thread", t); + } + + @Override + protected void doRun() throws Exception { + barrier.await(); + for (int i = 0; i < opsPerThread; i++) { + long seqNo = checkpointService.generateSeqNo(); + logger.info("[t{}] started [{}]", threadId, seqNo); + if (seqNo != unFinisshedSeq) { + checkpointService.markSeqNoAsCompleted(seqNo); + logger.info("[t{}] completed [{}]", threadId, seqNo); + } + } + } + }, "testConcurrentPrimary_" + threadId); + threads[t].start(); + } + for (Thread thread : threads) { + thread.join(); + } + assertThat(checkpointService.getMaxSeqNo(), equalTo(maxOps - 1L)); + assertThat(checkpointService.getCheckpoint(), equalTo(unFinisshedSeq - 1L)); + checkpointService.markSeqNoAsCompleted(unFinisshedSeq); + assertThat(checkpointService.getCheckpoint(), equalTo(maxOps - 1L)); + } + + public void testConcurrentReplica() throws InterruptedException { + Thread[] threads = new Thread[randomIntBetween(2, 5)]; + final int opsPerThread = randomIntBetween(10, 20); + final int maxOps = opsPerThread * threads.length; + final long unFinisshedSeq = randomIntBetween(maxOps - SMALL_INDEX_LAG_THRESHOLD, maxOps - 2); // make sure we won't be blocked + Set seqNoList = new HashSet<>(); + for (int i = 0; i < maxOps; i++) { + seqNoList.add(i); + } + + final Integer[][] seqNoPerThread = new Integer[threads.length][]; + for (int t = 0; t < threads.length - 1; t++) { + int size = Math.min(seqNoList.size(), randomIntBetween(opsPerThread - 4, opsPerThread + 4)); + seqNoPerThread[t] = randomSubsetOf(size, seqNoList).toArray(new Integer[size]); + Arrays.sort(seqNoPerThread[t]); + seqNoList.removeAll(Arrays.asList(seqNoPerThread[t])); + } + seqNoPerThread[threads.length - 1] = seqNoList.toArray(new Integer[seqNoList.size()]); + logger.info("--> will run [{}] threads, maxOps [{}], unfinished seq no [{}]", threads.length, maxOps, unFinisshedSeq); + final CyclicBarrier barrier = new CyclicBarrier(threads.length); + for (int t = 0; t < threads.length; t++) { + final int threadId = t; + threads[t] = new Thread(new AbstractRunnable() { + @Override + public void onFailure(Throwable t) { + throw new ElasticsearchException("failure in background thread", t); + } + + @Override + protected void doRun() throws Exception { + barrier.await(); + Integer[] ops = seqNoPerThread[threadId]; + for (int seqNo : ops) { + if (seqNo != unFinisshedSeq) { + checkpointService.markSeqNoAsCompleted(seqNo); + logger.info("[t{}] completed [{}]", threadId, seqNo); + } + } + } + }, "testConcurrentPrimary_" + threadId); + threads[t].start(); + } + for (Thread thread : threads) { + thread.join(); + } + assertThat(checkpointService.getMaxSeqNo(), equalTo(maxOps - 1L)); + assertThat(checkpointService.getCheckpoint(), equalTo(unFinisshedSeq - 1L)); + checkpointService.markSeqNoAsCompleted(unFinisshedSeq); + assertThat(checkpointService.getCheckpoint(), equalTo(maxOps - 1L)); + } + +} \ No newline at end of file diff --git a/test-framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test-framework/src/main/java/org/elasticsearch/test/ESTestCase.java index c59c3ba4d4e6f..53f69b42ec1ca 100644 --- a/test-framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test-framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -29,14 +29,12 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; import com.carrotsearch.randomizedtesting.generators.RandomStrings; import com.carrotsearch.randomizedtesting.rules.TestRuleAdapter; - import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestRuleMarkFailure; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TimeUnits; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; import org.elasticsearch.bootstrap.BootstrapForTesting; import org.elasticsearch.cache.recycler.MockPageCacheRecycler; @@ -50,7 +48,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.MockBigArrays; import org.elasticsearch.common.util.concurrent.EsExecutors; -import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; @@ -58,11 +55,7 @@ import org.elasticsearch.test.junit.listeners.LoggingListener; import org.elasticsearch.test.junit.listeners.ReproduceInfoPrinter; import org.elasticsearch.threadpool.ThreadPool; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Rule; +import org.junit.*; import org.junit.rules.RuleChain; import java.io.IOException; @@ -570,6 +563,19 @@ public static List randomSubsetOf(int size, T... values) { return list.subList(0, size); } + /** + * Returns size random values + */ + public static List randomSubsetOf(int size, Collection values) { + if (size > values.size()) { + throw new IllegalArgumentException("Can\'t pick " + size + " random objects from a list of " + values.size() + " objects"); + } + List list = new ArrayList<>(values); + Collections.shuffle(list); + return list.subList(0, size); + } + + /** * Returns true iff assertions for elasticsearch packages are enabled */ From 1ef8a0d342492629ba1a647574440b299a81b279 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Mon, 30 Nov 2015 14:56:20 +0100 Subject: [PATCH 09/12] Java Docs! --- .../index/seqno/LocalCheckpointService.java | 37 +++++++++++++++++++ .../elasticsearch/index/seqno/SeqNoStats.java | 2 + .../index/seqno/SequenceNumbersService.java | 7 ++++ 3 files changed, 46 insertions(+) diff --git a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java index 49cdb7d921fdd..8dbfdd8d8f743 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java @@ -26,20 +26,43 @@ import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.ShardId; +/** + * This class generates sequences numbers and keeps track of the so called local checkpoint - the highest number for which + * all previous seqNo have been processed (including) + */ public class LocalCheckpointService extends AbstractIndexShardComponent { + /** sets the maximum spread between lowest and highest seq no in flight */ public static String SETTINGS_INDEX_LAG_THRESHOLD = "index.seq_no.index_lag.threshold"; + + /** + * how long should an incoming indexing request which violates {@link #SETTINGS_INDEX_LAG_THRESHOLD } should wait + * before being rejected + */ public static String SETTINGS_INDEX_LAG_MAX_WAIT = "index.seq_no.index_lag.max_wait"; + + /** default value for {@link #SETTINGS_INDEX_LAG_THRESHOLD} */ final static int DEFAULT_INDEX_LAG_THRESHOLD = 1024; + + /** default value for {@link #SETTINGS_INDEX_LAG_MAX_WAIT} */ final static TimeValue DEFAULT_INDEX_LAG_MAX_WAIT = TimeValue.timeValueSeconds(30); + /** protects changes to all internal state and signals changes in {@link #checkpoint} */ final Object mutex = new Object(); + + /** each bits maps to a seqNo in round robin fashion. a set bit means the seqNo has been processed */ final FixedBitSet processedSeqNo; + + /** value of {@link #SETTINGS_INDEX_LAG_THRESHOLD } */ final int indexLagThreshold; + /** value of {#link #SETTINGS_INDEX_LAG_THRESHOLD } */ final TimeValue indexLagMaxWait; + /** the next available seqNo - used for seqNo generation */ volatile long nextSeqNo = 0; + + /** the current local checkpoint, i.e., all seqNo lower<= this number have been completed */ volatile long checkpoint = -1; public LocalCheckpointService(ShardId shardId, IndexSettings indexSettings) { @@ -50,6 +73,11 @@ public LocalCheckpointService(ShardId shardId, IndexSettings indexSettings) { } + /** + * issue the next sequence number + * + * Note that this method can block to honour maximum indexing lag . See {@link #SETTINGS_INDEX_LAG_THRESHOLD } + **/ public long generateSeqNo() { synchronized (mutex) { // we have to keep checking when ensure capacity returns because it release the lock and nextSeqNo may change @@ -60,6 +88,10 @@ public long generateSeqNo() { } } + /** + * marks the processing of the given seqNo have been completed + * Note that this method can block to honour maximum indexing lag . See {@link #SETTINGS_INDEX_LAG_THRESHOLD } + **/ public long markSeqNoAsCompleted(long seqNo) { synchronized (mutex) { // make sure we track highest seen seqNo @@ -88,20 +120,24 @@ public long markSeqNoAsCompleted(long seqNo) { return checkpoint; } + /** get's the current check point */ public long getCheckpoint() { return checkpoint; } + /** get's the maximum seqno seen so far */ public long getMaxSeqNo() { return nextSeqNo - 1; } + /** checks if seqNo violates {@link #SETTINGS_INDEX_LAG_THRESHOLD } */ private boolean hasCapacity(long seqNo) { assert Thread.holdsLock(mutex); return (seqNo - checkpoint) <= indexLagThreshold; } + /** blocks until {@link #SETTINGS_INDEX_LAG_THRESHOLD } is honoured or raises {@link EsRejectedExecutionException }*/ private void ensureCapacity(long seqNo) { assert Thread.holdsLock(mutex); long retry = 0; @@ -126,6 +162,7 @@ private void ensureCapacity(long seqNo) { } } + /** maps the given seqNo to a position in {@link #processedSeqNo} */ private int seqNoToOffset(long seqNo) { assert seqNo - checkpoint <= indexLagThreshold; assert seqNo > checkpoint; diff --git a/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java b/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java index a2052f75976a0..faf93eb276607 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java @@ -39,10 +39,12 @@ public SeqNoStats(long maxSeqNo, long localCheckpoint) { this.localCheckpoint = localCheckpoint; } + /** the maximum sequence number seen so far */ public long getMaxSeqNo() { return maxSeqNo; } + /** the maximum sequence number for which all previous operations (including) have been completed */ public long getLocalCheckpoint() { return localCheckpoint; } diff --git a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java index fd6fa7387e687..3ef8607c4c230 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java @@ -42,10 +42,17 @@ public long generateSeqNo() { return localCheckpointService.generateSeqNo(); } + /** + * marks the given seqNo as completed. See {@link LocalCheckpointService#markSeqNoAsCompleted(long)} + * more details + */ public void markSeqNoAsCompleted(long seqNo) { localCheckpointService.markSeqNoAsCompleted(seqNo); } + /** + * Gets sequence number related stats + */ public SeqNoStats stats() { return new SeqNoStats(localCheckpointService.getMaxSeqNo(), localCheckpointService.getCheckpoint()); } From 4edb7aafc70a073f1c5c5694ed82be72095d2b21 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Mon, 30 Nov 2015 15:00:00 +0100 Subject: [PATCH 10/12] tweak --- .../elasticsearch/action/admin/indices/stats/ShardStats.java | 2 +- .../java/org/elasticsearch/common/io/stream/StreamOutput.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java index 3b1ff2e538de8..e921c13b979b6 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java @@ -118,7 +118,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(statePath); out.writeString(dataPath); out.writeBoolean(isCustomDataPath); - out.writeOptionalStreamable(seqNoStats); + out.writeOptionalWritable(seqNoStats); } @Override diff --git a/core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index 0285e56355792..d5e96d9b0dbdb 100644 --- a/core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -506,7 +506,7 @@ public void writeOptionalStreamable(@Nullable Streamable streamable) throws IOEx /** * Serializes a potential null value. */ - public void writeOptionalStreamable(@Nullable Writeable writeable) throws IOException { + public void writeOptionalWritable(@Nullable Writeable writeable) throws IOException { if (writeable != null) { writeBoolean(true); writeable.writeTo(this); From 0b50630e4e69fb86aead55230a6a34df728891f7 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Thu, 10 Dec 2015 23:26:11 +0100 Subject: [PATCH 11/12] Another checkpoint implementation --- .../metadata/MetaDataIndexUpgradeService.java | 2 +- .../index/seqno/LocalCheckpointService.java | 159 +++++++----------- .../seqno/LocalCheckpointServiceTests.java | 133 ++++----------- 3 files changed, 94 insertions(+), 200 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index 70a492954d120..510faebcbdad4 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -151,7 +151,7 @@ private static boolean isSupportedVersion(IndexMetaData indexMetaData) { "index.translog.interval", "index.translog.sync_interval", "index.shard.inactive_time", - LocalCheckpointService.SETTINGS_INDEX_LAG_MAX_WAIT, + LocalCheckpointService.SETTINGS_BIT_ARRAY_CHUNK_SIZE, UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING)); /** diff --git a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java index 8dbfdd8d8f743..b3a4179417f64 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java @@ -19,105 +19,66 @@ package org.elasticsearch.index.seqno; import org.apache.lucene.util.FixedBitSet; -import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.ShardId; +import java.util.LinkedList; + /** * This class generates sequences numbers and keeps track of the so called local checkpoint - the highest number for which * all previous seqNo have been processed (including) */ public class LocalCheckpointService extends AbstractIndexShardComponent { - /** sets the maximum spread between lowest and highest seq no in flight */ - public static String SETTINGS_INDEX_LAG_THRESHOLD = "index.seq_no.index_lag.threshold"; - - /** - * how long should an incoming indexing request which violates {@link #SETTINGS_INDEX_LAG_THRESHOLD } should wait - * before being rejected - */ - public static String SETTINGS_INDEX_LAG_MAX_WAIT = "index.seq_no.index_lag.max_wait"; - - /** default value for {@link #SETTINGS_INDEX_LAG_THRESHOLD} */ - final static int DEFAULT_INDEX_LAG_THRESHOLD = 1024; - - /** default value for {@link #SETTINGS_INDEX_LAG_MAX_WAIT} */ - final static TimeValue DEFAULT_INDEX_LAG_MAX_WAIT = TimeValue.timeValueSeconds(30); + public static String SETTINGS_BIT_ARRAY_CHUNK_SIZE = "index.seq_no.checkpoint.bit_array_chunk_size"; - /** protects changes to all internal state and signals changes in {@link #checkpoint} */ - final Object mutex = new Object(); + /** default value for {@link #SETTINGS_BIT_ARRAY_CHUNK_SIZE} */ + final static int DEFAULT_BIT_ARRAY_CHUNK_SIZE = 1024; - /** each bits maps to a seqNo in round robin fashion. a set bit means the seqNo has been processed */ - final FixedBitSet processedSeqNo; - /** value of {@link #SETTINGS_INDEX_LAG_THRESHOLD } */ - final int indexLagThreshold; - /** value of {#link #SETTINGS_INDEX_LAG_THRESHOLD } */ - final TimeValue indexLagMaxWait; + final LinkedList processedSeqNo; + final int processedSeqNoChunkSize; + long minSeqNoInProcessSeqNo = 0; + /** the current local checkpoint, i.e., all seqNo lower<= this number have been completed */ + volatile long checkpoint = -1; /** the next available seqNo - used for seqNo generation */ volatile long nextSeqNo = 0; - /** the current local checkpoint, i.e., all seqNo lower<= this number have been completed */ - volatile long checkpoint = -1; public LocalCheckpointService(ShardId shardId, IndexSettings indexSettings) { super(shardId, indexSettings); - indexLagThreshold = indexSettings.getSettings().getAsInt(SETTINGS_INDEX_LAG_THRESHOLD, DEFAULT_INDEX_LAG_THRESHOLD); - indexLagMaxWait = indexSettings.getSettings().getAsTime(SETTINGS_INDEX_LAG_MAX_WAIT, DEFAULT_INDEX_LAG_MAX_WAIT); - processedSeqNo = new FixedBitSet(indexLagThreshold); - + processedSeqNoChunkSize = indexSettings.getSettings().getAsInt(SETTINGS_BIT_ARRAY_CHUNK_SIZE, DEFAULT_BIT_ARRAY_CHUNK_SIZE); + processedSeqNo = new LinkedList<>(); } /** * issue the next sequence number - * - * Note that this method can block to honour maximum indexing lag . See {@link #SETTINGS_INDEX_LAG_THRESHOLD } **/ - public long generateSeqNo() { - synchronized (mutex) { - // we have to keep checking when ensure capacity returns because it release the lock and nextSeqNo may change - while (hasCapacity(nextSeqNo) == false) { - ensureCapacity(nextSeqNo); - } - return nextSeqNo++; - } + public synchronized long generateSeqNo() { + return nextSeqNo++; } /** * marks the processing of the given seqNo have been completed - * Note that this method can block to honour maximum indexing lag . See {@link #SETTINGS_INDEX_LAG_THRESHOLD } **/ - public long markSeqNoAsCompleted(long seqNo) { - synchronized (mutex) { - // make sure we track highest seen seqNo - if (seqNo >= nextSeqNo) { - nextSeqNo = seqNo + 1; - } - if (seqNo <= checkpoint) { - // this is possible during recover where we might replay an op that was also replicated - return checkpoint; - } - // just to be safe (previous calls to generateSeqNo/markSeqNoAsStarted should ensure this is OK) - ensureCapacity(seqNo); - int offset = seqNoToOffset(seqNo); - processedSeqNo.set(offset); - if (seqNo == checkpoint + 1) { - do { - // clear the flag as we are making it free for future operations. do se before we expose it - // by moving the checkpoint - processedSeqNo.clear(offset); - checkpoint++; - offset = seqNoToOffset(checkpoint + 1); - } while (processedSeqNo.get(offset)); - mutex.notifyAll(); - } + public synchronized void markSeqNoAsCompleted(long seqNo) { + // make sure we track highest seen seqNo + if (seqNo >= nextSeqNo) { + nextSeqNo = seqNo + 1; + } + if (seqNo <= checkpoint) { + // this is possible during recover where we might replay an op that was also replicated + return; + } + FixedBitSet bitSet = getBitSetForSeqNo(seqNo); + int offset = seqNoToBitSetOffset(seqNo); + bitSet.set(offset); + if (seqNo == checkpoint + 1) { + updateCheckpoint(); } - return checkpoint; } /** get's the current check point */ @@ -130,43 +91,41 @@ public long getMaxSeqNo() { return nextSeqNo - 1; } - - /** checks if seqNo violates {@link #SETTINGS_INDEX_LAG_THRESHOLD } */ - private boolean hasCapacity(long seqNo) { - assert Thread.holdsLock(mutex); - return (seqNo - checkpoint) <= indexLagThreshold; + private void updateCheckpoint() { + assert Thread.holdsLock(this); + assert checkpoint - minSeqNoInProcessSeqNo < processedSeqNoChunkSize : "checkpoint to minSeqNoInProcessSeqNo is larger then a bit set"; + assert getBitSetForSeqNo(checkpoint + 1).get(seqNoToBitSetOffset(checkpoint + 1)) : "updateCheckpoint is called but the bit following the checkpoint is not set"; + assert getBitSetForSeqNo(checkpoint + 1) == processedSeqNo.getFirst() : "checkpoint + 1 doesn't point to the first bit set"; + // keep it simple for now, get the checkpoint one by one. in the future we can optimize and read words + FixedBitSet current = processedSeqNo.getFirst(); + do { + checkpoint++; + // the checkpoint always falls in the first bit set or just before. If it falls + // on the last bit of the current bit set, we can clean it. + if (checkpoint == minSeqNoInProcessSeqNo + processedSeqNoChunkSize - 1) { + processedSeqNo.pop(); + minSeqNoInProcessSeqNo += processedSeqNoChunkSize; + assert checkpoint - minSeqNoInProcessSeqNo < processedSeqNoChunkSize; + current = processedSeqNo.peekFirst(); + } + } while (current != null && current.get(seqNoToBitSetOffset(checkpoint + 1))); } - /** blocks until {@link #SETTINGS_INDEX_LAG_THRESHOLD } is honoured or raises {@link EsRejectedExecutionException }*/ - private void ensureCapacity(long seqNo) { - assert Thread.holdsLock(mutex); - long retry = 0; - final long maxRetries = indexLagMaxWait.seconds(); - while (hasCapacity(seqNo) == false) { - try { - if (retry > maxRetries) { - ElasticsearchException e = new EsRejectedExecutionException("indexing lag exceeds [{}] (seq# requested [{}], local checkpoint [{}]", - indexLagThreshold, seqNo, checkpoint); - e.setShard(shardId()); - throw e; - } - - // this temporary releases the lock on mutex - mutex.wait(Math.min(1000, indexLagMaxWait.millis() - retry * 1000)); - retry++; - } catch (InterruptedException ie) { - ElasticsearchException exp = new ElasticsearchException("interrupted while waiting on index lag"); - exp.setShard(shardId()); - throw exp; - } + private FixedBitSet getBitSetForSeqNo(long seqNo) { + assert Thread.holdsLock(this); + assert seqNo >= minSeqNoInProcessSeqNo; + int bitSetOffset = ((int) (seqNo - minSeqNoInProcessSeqNo)) / processedSeqNoChunkSize; + while (bitSetOffset >= processedSeqNo.size()) { + processedSeqNo.add(new FixedBitSet(processedSeqNoChunkSize)); } + return processedSeqNo.get(bitSetOffset); } - /** maps the given seqNo to a position in {@link #processedSeqNo} */ - private int seqNoToOffset(long seqNo) { - assert seqNo - checkpoint <= indexLagThreshold; - assert seqNo > checkpoint; - return (int) (seqNo % indexLagThreshold); - } + /** maps the given seqNo to a position in the bit set returned by {@link #getBitSetForSeqNo} */ + private int seqNoToBitSetOffset(long seqNo) { + assert Thread.holdsLock(this); + assert seqNo >= minSeqNoInProcessSeqNo; + return ((int) (seqNo - minSeqNoInProcessSeqNo)) % processedSeqNoChunkSize; + } } diff --git a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java index 73e7f839d1a26..53c1de7dd5208 100644 --- a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java +++ b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java @@ -20,45 +20,39 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.AbstractRunnable; -import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; import org.junit.Before; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.CountDownLatch; +import java.util.*; import java.util.concurrent.CyclicBarrier; -import java.util.concurrent.atomic.AtomicBoolean; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.isOneOf; public class LocalCheckpointServiceTests extends ESTestCase { LocalCheckpointService checkpointService; - final int SMALL_INDEX_LAG_THRESHOLD = 10; + final int SMALL_CHUNK_SIZE = 4; @Override @Before public void setUp() throws Exception { super.setUp(); - checkpointService = getCheckpointService(SMALL_INDEX_LAG_THRESHOLD, LocalCheckpointService.DEFAULT_INDEX_LAG_MAX_WAIT); + checkpointService = getCheckpointService(); } - protected LocalCheckpointService getCheckpointService(int thresholdLag, TimeValue thresholdDelay) { + protected LocalCheckpointService getCheckpointService() { return new LocalCheckpointService( - new ShardId("test", 0), - IndexSettingsModule.newIndexSettings("test", - Settings.builder() - .put(LocalCheckpointService.SETTINGS_INDEX_LAG_THRESHOLD, thresholdLag) - .put(LocalCheckpointService.SETTINGS_INDEX_LAG_MAX_WAIT, thresholdDelay) - .build() - )); + new ShardId("test", 0), + IndexSettingsModule.newIndexSettings("test", + Settings.builder() + .put(LocalCheckpointService.SETTINGS_BIT_ARRAY_CHUNK_SIZE, SMALL_CHUNK_SIZE) + .build() + )); } public void testSimplePrimary() { @@ -88,88 +82,28 @@ public void testSimpleReplica() { assertThat(checkpointService.getCheckpoint(), equalTo(2L)); } - public void testIndexThrottleSuccessPrimary() throws Exception { - LocalCheckpointService checkpoint = getCheckpointService(3, TimeValue.timeValueHours(1)); - final long seq1 = checkpoint.generateSeqNo(); - final long seq2 = checkpoint.generateSeqNo(); - final long seq3 = checkpoint.generateSeqNo(); - final CountDownLatch threadStarted = new CountDownLatch(1); - final AtomicBoolean threadDone = new AtomicBoolean(false); - Thread backgroundThread = new Thread(() -> { - threadStarted.countDown(); - checkpoint.generateSeqNo(); - threadDone.set(true); - }, "testIndexDelayPrimary"); - backgroundThread.start(); - logger.info("--> waiting for thread to start"); - threadStarted.await(); - assertFalse("background thread finished but should have waited", threadDone.get()); - checkpoint.markSeqNoAsCompleted(seq2); - assertFalse("background thread finished but should have waited (seq2 completed)", threadDone.get()); - checkpoint.markSeqNoAsCompleted(seq1); - logger.info("--> waiting for thread to stop"); - assertBusy(() -> { - assertTrue("background thread should finished after finishing seq1", threadDone.get()); - }); - } + public void testSimpleOverFlow() { + List seqNoList = new ArrayList<>(); + final boolean aligned = randomBoolean(); + final int maxOps = SMALL_CHUNK_SIZE * randomIntBetween(1, 5) + (aligned ? 0 : randomIntBetween(1, SMALL_CHUNK_SIZE - 1)); - public void testIndexThrottleTimeoutPrimary() throws Exception { - LocalCheckpointService checkpoint = getCheckpointService(2, TimeValue.timeValueMillis(100)); - checkpoint.generateSeqNo(); - checkpoint.generateSeqNo(); - try { - checkpoint.generateSeqNo(); - fail("index operation should time out due to a large lag"); - } catch (EsRejectedExecutionException e) { - // OK! - } - } - - public void testIndexThrottleSuccessReplica() throws Exception { - LocalCheckpointService checkpoint = getCheckpointService(3, TimeValue.timeValueHours(1)); - final CountDownLatch threadStarted = new CountDownLatch(1); - final AtomicBoolean threadDone = new AtomicBoolean(false); - checkpoint.markSeqNoAsCompleted(1); - Thread backgroundThread = new Thread(() -> { - threadStarted.countDown(); - checkpoint.markSeqNoAsCompleted(3); - threadDone.set(true); - }, "testIndexDelayReplica"); - backgroundThread.start(); - logger.info("--> waiting for thread to start"); - threadStarted.await(); - assertFalse("background thread finished but should have waited", threadDone.get()); - checkpoint.markSeqNoAsCompleted(0); - logger.info("--> waiting for thread to stop"); - assertBusy(() -> { - assertTrue("background thread should finished after finishing seq1", threadDone.get()); - }); - } - - public void testIndexThrottleTimeoutReplica() throws Exception { - LocalCheckpointService checkpoint = getCheckpointService(1, TimeValue.timeValueMillis(100)); - try { - checkpoint.markSeqNoAsCompleted(1L); - fail("index operation should time out due to a large lag"); - } catch (EsRejectedExecutionException e) { - // OK! + for (int i = 0; i < maxOps; i++) { + seqNoList.add(i); } - checkpoint.markSeqNoAsCompleted(0L); - try { - checkpoint.markSeqNoAsCompleted(2L); - fail("index operation should time out due to a large lag"); - } catch (EsRejectedExecutionException e) { - // OK! + Collections.shuffle(seqNoList, random()); + for (Integer seqNo : seqNoList) { + checkpointService.markSeqNoAsCompleted(seqNo); } - + assertThat(checkpointService.checkpoint, equalTo(maxOps - 1L)); + assertThat(checkpointService.processedSeqNo.size(), equalTo(aligned ? 0 : 1)); } public void testConcurrentPrimary() throws InterruptedException { Thread[] threads = new Thread[randomIntBetween(2, 5)]; final int opsPerThread = randomIntBetween(10, 20); final int maxOps = opsPerThread * threads.length; - final long unFinisshedSeq = randomIntBetween(maxOps - SMALL_INDEX_LAG_THRESHOLD, maxOps - 2); // make sure we won't be blocked - logger.info("--> will run [{}] threads, maxOps [{}], unfinished seq no [{}]", threads.length, maxOps, unFinisshedSeq); + final long unFinishedSeq = randomIntBetween(0, maxOps - 2); // make sure we always index the last seqNo to simplify maxSeq checks + logger.info("--> will run [{}] threads, maxOps [{}], unfinished seq no [{}]", threads.length, maxOps, unFinishedSeq); final CyclicBarrier barrier = new CyclicBarrier(threads.length); for (int t = 0; t < threads.length; t++) { final int threadId = t; @@ -185,7 +119,7 @@ protected void doRun() throws Exception { for (int i = 0; i < opsPerThread; i++) { long seqNo = checkpointService.generateSeqNo(); logger.info("[t{}] started [{}]", threadId, seqNo); - if (seqNo != unFinisshedSeq) { + if (seqNo != unFinishedSeq) { checkpointService.markSeqNoAsCompleted(seqNo); logger.info("[t{}] completed [{}]", threadId, seqNo); } @@ -198,16 +132,17 @@ protected void doRun() throws Exception { thread.join(); } assertThat(checkpointService.getMaxSeqNo(), equalTo(maxOps - 1L)); - assertThat(checkpointService.getCheckpoint(), equalTo(unFinisshedSeq - 1L)); - checkpointService.markSeqNoAsCompleted(unFinisshedSeq); + assertThat(checkpointService.getCheckpoint(), equalTo(unFinishedSeq - 1L)); + checkpointService.markSeqNoAsCompleted(unFinishedSeq); assertThat(checkpointService.getCheckpoint(), equalTo(maxOps - 1L)); + assertThat(checkpointService.processedSeqNo.size(), isOneOf(0, 1)); } public void testConcurrentReplica() throws InterruptedException { Thread[] threads = new Thread[randomIntBetween(2, 5)]; final int opsPerThread = randomIntBetween(10, 20); final int maxOps = opsPerThread * threads.length; - final long unFinisshedSeq = randomIntBetween(maxOps - SMALL_INDEX_LAG_THRESHOLD, maxOps - 2); // make sure we won't be blocked + final long unFinishedSeq = randomIntBetween(0, maxOps - 2); // make sure we always index the last seqNo to simplify maxSeq checks Set seqNoList = new HashSet<>(); for (int i = 0; i < maxOps; i++) { seqNoList.add(i); @@ -221,7 +156,7 @@ public void testConcurrentReplica() throws InterruptedException { seqNoList.removeAll(Arrays.asList(seqNoPerThread[t])); } seqNoPerThread[threads.length - 1] = seqNoList.toArray(new Integer[seqNoList.size()]); - logger.info("--> will run [{}] threads, maxOps [{}], unfinished seq no [{}]", threads.length, maxOps, unFinisshedSeq); + logger.info("--> will run [{}] threads, maxOps [{}], unfinished seq no [{}]", threads.length, maxOps, unFinishedSeq); final CyclicBarrier barrier = new CyclicBarrier(threads.length); for (int t = 0; t < threads.length; t++) { final int threadId = t; @@ -236,7 +171,7 @@ protected void doRun() throws Exception { barrier.await(); Integer[] ops = seqNoPerThread[threadId]; for (int seqNo : ops) { - if (seqNo != unFinisshedSeq) { + if (seqNo != unFinishedSeq) { checkpointService.markSeqNoAsCompleted(seqNo); logger.info("[t{}] completed [{}]", threadId, seqNo); } @@ -249,9 +184,9 @@ protected void doRun() throws Exception { thread.join(); } assertThat(checkpointService.getMaxSeqNo(), equalTo(maxOps - 1L)); - assertThat(checkpointService.getCheckpoint(), equalTo(unFinisshedSeq - 1L)); - checkpointService.markSeqNoAsCompleted(unFinisshedSeq); + assertThat(checkpointService.getCheckpoint(), equalTo(unFinishedSeq - 1L)); + checkpointService.markSeqNoAsCompleted(unFinishedSeq); assertThat(checkpointService.getCheckpoint(), equalTo(maxOps - 1L)); } -} \ No newline at end of file +} From 7f49c1a6ecec69a511dc5534959e696b961b71a1 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Fri, 11 Dec 2015 09:46:07 +0100 Subject: [PATCH 12/12] feedback --- .../admin/indices/stats/ShardStats.java | 2 +- .../metadata/MetaDataIndexUpgradeService.java | 2 +- .../common/io/stream/StreamInput.java | 2 +- .../index/seqno/LocalCheckpointService.java | 50 +++++++++++++------ .../elasticsearch/index/seqno/SeqNoStats.java | 10 ++-- .../seqno/LocalCheckpointServiceTests.java | 2 +- .../org/elasticsearch/test/ESTestCase.java | 11 ++-- 7 files changed, 47 insertions(+), 32 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java index e921c13b979b6..81586f0fa7cae 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/stats/ShardStats.java @@ -107,7 +107,7 @@ public void readFrom(StreamInput in) throws IOException { statePath = in.readString(); dataPath = in.readString(); isCustomDataPath = in.readBoolean(); - seqNoStats = in.readOptionalStreamableReader(SeqNoStats.PROTOTYPE); + seqNoStats = in.readOptionalStreamableReader(SeqNoStats::new); } @Override diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index 510faebcbdad4..6c1d41da98301 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -151,7 +151,7 @@ private static boolean isSupportedVersion(IndexMetaData indexMetaData) { "index.translog.interval", "index.translog.sync_interval", "index.shard.inactive_time", - LocalCheckpointService.SETTINGS_BIT_ARRAY_CHUNK_SIZE, + LocalCheckpointService.SETTINGS_BIT_ARRAYS_SIZE, UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING)); /** diff --git a/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index 23d414c851909..4a47aa65f6754 100644 --- a/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -536,7 +536,7 @@ public T readOptionalStreamable(Supplier supplier) thr /** * Serializes a potential null value. */ - public > T readOptionalStreamableReader(StreamableReader streamableReader) throws IOException { + public T readOptionalStreamableReader(StreamableReader streamableReader) throws IOException { if (readBoolean()) { return streamableReader.readFrom(this); } else { diff --git a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java index b3a4179417f64..a1835171802c2 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java @@ -31,15 +31,23 @@ */ public class LocalCheckpointService extends AbstractIndexShardComponent { - public static String SETTINGS_BIT_ARRAY_CHUNK_SIZE = "index.seq_no.checkpoint.bit_array_chunk_size"; + /** + * we keep a bit for each seq No that is still pending. to optimize allocation, we do so in multiple arrays + * allocating them on demand and cleaning up while completed. This setting controls the size of the arrays + */ + public static String SETTINGS_BIT_ARRAYS_SIZE = "index.seq_no.checkpoint.bit_arrays_size"; - /** default value for {@link #SETTINGS_BIT_ARRAY_CHUNK_SIZE} */ - final static int DEFAULT_BIT_ARRAY_CHUNK_SIZE = 1024; + /** default value for {@link #SETTINGS_BIT_ARRAYS_SIZE} */ + final static int DEFAULT_BIT_ARRAYS_SIZE = 1024; + /** + * an order list of bit arrays representing pending seq nos. The list is "anchored" in {@link #firstSeqNoInProcessSeqNo} + * which marks the seqNo the fist bit in the first array corresponds to. + */ final LinkedList processedSeqNo; - final int processedSeqNoChunkSize; - long minSeqNoInProcessSeqNo = 0; + final int bitArraysSize; + long firstSeqNoInProcessSeqNo = 0; /** the current local checkpoint, i.e., all seqNo lower<= this number have been completed */ volatile long checkpoint = -1; @@ -50,7 +58,10 @@ public class LocalCheckpointService extends AbstractIndexShardComponent { public LocalCheckpointService(ShardId shardId, IndexSettings indexSettings) { super(shardId, indexSettings); - processedSeqNoChunkSize = indexSettings.getSettings().getAsInt(SETTINGS_BIT_ARRAY_CHUNK_SIZE, DEFAULT_BIT_ARRAY_CHUNK_SIZE); + bitArraysSize = indexSettings.getSettings().getAsInt(SETTINGS_BIT_ARRAYS_SIZE, DEFAULT_BIT_ARRAYS_SIZE); + if (bitArraysSize <= 0) { + throw new IllegalArgumentException("[" + SETTINGS_BIT_ARRAYS_SIZE + "] must be positive. got [" + bitArraysSize + "]"); + } processedSeqNo = new LinkedList<>(); } @@ -70,7 +81,7 @@ public synchronized void markSeqNoAsCompleted(long seqNo) { nextSeqNo = seqNo + 1; } if (seqNo <= checkpoint) { - // this is possible during recover where we might replay an op that was also replicated + // this is possible during recovery where we might replay an op that was also replicated return; } FixedBitSet bitSet = getBitSetForSeqNo(seqNo); @@ -91,9 +102,13 @@ public long getMaxSeqNo() { return nextSeqNo - 1; } + /** + * moves the checkpoint to the last consecutively processed seqNo + * Note: this method assumes that the seqNo following the current checkpoint is processed. + */ private void updateCheckpoint() { assert Thread.holdsLock(this); - assert checkpoint - minSeqNoInProcessSeqNo < processedSeqNoChunkSize : "checkpoint to minSeqNoInProcessSeqNo is larger then a bit set"; + assert checkpoint - firstSeqNoInProcessSeqNo < bitArraysSize : "checkpoint to firstSeqNoInProcessSeqNo is larger then a bit set"; assert getBitSetForSeqNo(checkpoint + 1).get(seqNoToBitSetOffset(checkpoint + 1)) : "updateCheckpoint is called but the bit following the checkpoint is not set"; assert getBitSetForSeqNo(checkpoint + 1) == processedSeqNo.getFirst() : "checkpoint + 1 doesn't point to the first bit set"; // keep it simple for now, get the checkpoint one by one. in the future we can optimize and read words @@ -102,21 +117,24 @@ private void updateCheckpoint() { checkpoint++; // the checkpoint always falls in the first bit set or just before. If it falls // on the last bit of the current bit set, we can clean it. - if (checkpoint == minSeqNoInProcessSeqNo + processedSeqNoChunkSize - 1) { + if (checkpoint == firstSeqNoInProcessSeqNo + bitArraysSize - 1) { processedSeqNo.pop(); - minSeqNoInProcessSeqNo += processedSeqNoChunkSize; - assert checkpoint - minSeqNoInProcessSeqNo < processedSeqNoChunkSize; + firstSeqNoInProcessSeqNo += bitArraysSize; + assert checkpoint - firstSeqNoInProcessSeqNo < bitArraysSize; current = processedSeqNo.peekFirst(); } } while (current != null && current.get(seqNoToBitSetOffset(checkpoint + 1))); } + /** + * gets the bit array for the give seqNo, allocating new ones if needed. + */ private FixedBitSet getBitSetForSeqNo(long seqNo) { assert Thread.holdsLock(this); - assert seqNo >= minSeqNoInProcessSeqNo; - int bitSetOffset = ((int) (seqNo - minSeqNoInProcessSeqNo)) / processedSeqNoChunkSize; + assert seqNo >= firstSeqNoInProcessSeqNo; + int bitSetOffset = ((int) (seqNo - firstSeqNoInProcessSeqNo)) / bitArraysSize; while (bitSetOffset >= processedSeqNo.size()) { - processedSeqNo.add(new FixedBitSet(processedSeqNoChunkSize)); + processedSeqNo.add(new FixedBitSet(bitArraysSize)); } return processedSeqNo.get(bitSetOffset); } @@ -125,7 +143,7 @@ private FixedBitSet getBitSetForSeqNo(long seqNo) { /** maps the given seqNo to a position in the bit set returned by {@link #getBitSetForSeqNo} */ private int seqNoToBitSetOffset(long seqNo) { assert Thread.holdsLock(this); - assert seqNo >= minSeqNoInProcessSeqNo; - return ((int) (seqNo - minSeqNoInProcessSeqNo)) % processedSeqNoChunkSize; + assert seqNo >= firstSeqNoInProcessSeqNo; + return ((int) (seqNo - firstSeqNoInProcessSeqNo)) % bitArraysSize; } } diff --git a/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java b/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java index faf93eb276607..99ffb6ad54794 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/SeqNoStats.java @@ -29,8 +29,6 @@ public class SeqNoStats implements ToXContent, Writeable { - public static final SeqNoStats PROTOTYPE = new SeqNoStats(0, 0); - final long maxSeqNo; final long localCheckpoint; @@ -39,6 +37,10 @@ public SeqNoStats(long maxSeqNo, long localCheckpoint) { this.localCheckpoint = localCheckpoint; } + public SeqNoStats(StreamInput in) throws IOException { + this(in.readZLong(), in.readZLong()); + } + /** the maximum sequence number seen so far */ public long getMaxSeqNo() { return maxSeqNo; @@ -56,8 +58,8 @@ public SeqNoStats readFrom(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeLong(maxSeqNo); - out.writeLong(localCheckpoint); + out.writeZLong(maxSeqNo); + out.writeZLong(localCheckpoint); } @Override diff --git a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java index 53c1de7dd5208..429c60af00d76 100644 --- a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java +++ b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointServiceTests.java @@ -50,7 +50,7 @@ protected LocalCheckpointService getCheckpointService() { new ShardId("test", 0), IndexSettingsModule.newIndexSettings("test", Settings.builder() - .put(LocalCheckpointService.SETTINGS_BIT_ARRAY_CHUNK_SIZE, SMALL_CHUNK_SIZE) + .put(LocalCheckpointService.SETTINGS_BIT_ARRAYS_SIZE, SMALL_CHUNK_SIZE) .build() )); } diff --git a/test-framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test-framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 53f69b42ec1ca..fb626d9963516 100644 --- a/test-framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test-framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -555,12 +555,7 @@ private static String groupName(ThreadGroup threadGroup) { * Returns size random values */ public static List randomSubsetOf(int size, T... values) { - if (size > values.length) { - throw new IllegalArgumentException("Can\'t pick " + size + " random objects from a list of " + values.length + " objects"); - } - List list = arrayAsArrayList(values); - Collections.shuffle(list); - return list.subList(0, size); + return randomSubsetOf(size, Arrays.asList(values)); } /** @@ -571,7 +566,7 @@ public static List randomSubsetOf(int size, Collection values) { throw new IllegalArgumentException("Can\'t pick " + size + " random objects from a list of " + values.size() + " objects"); } List list = new ArrayList<>(values); - Collections.shuffle(list); + Collections.shuffle(list, random()); return list.subList(0, size); } @@ -621,7 +616,7 @@ public void assertPathHasBeenCleared(Path path) throws Exception { sb.append("]"); assertThat(count + " files exist that should have been cleaned:\n" + sb.toString(), count, equalTo(0)); } - + /** Returns the suite failure marker: internal use only! */ public static TestRuleMarkFailure getSuiteFailureMarker() { return suiteFailureMarker;