Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
00a67b4
Huge commit - restore uses installshard - big update in locking
HoustonPutman Jul 24, 2025
2d5e12b
Implement callingLock mirroring for distributed API Manager locking
HoustonPutman Aug 1, 2025
37cea68
Merge remote-tracking branch 'apache/main' into locking-update
HoustonPutman Sep 2, 2025
293f35d
Merge remote-tracking branch 'apache/main' into locking-update
HoustonPutman Dec 2, 2025
242235a
Changelog
HoustonPutman Dec 2, 2025
72ae4bf
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 5, 2026
b0452f4
Support distributed locks better
HoustonPutman Jan 8, 2026
72401ca
Start switching over to AdminCmdContext
HoustonPutman Jan 8, 2026
6dac593
Refactor how locking is passed through collections api commands.
HoustonPutman Jan 9, 2026
e368c48
Make some bug fixes
HoustonPutman Jan 10, 2026
97352f7
Some test 'fixes', need to rethink v2 tests still
HoustonPutman Jan 10, 2026
6f2d61a
Add real tests for DeleteAlias and DeleteNode v2 apis
HoustonPutman Jan 12, 2026
74176a4
Tidy
HoustonPutman Jan 12, 2026
df0888c
Fix broken tests
HoustonPutman Jan 13, 2026
3627d29
Add missing test utility class
HoustonPutman Jan 13, 2026
4a49aa9
Move over remaining APIs and tests
HoustonPutman Jan 13, 2026
021303a
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 13, 2026
e61279c
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 17, 2026
4c9a766
Fix changelog entry
HoustonPutman Jan 17, 2026
c7cfe19
Remove files that shouldn't be changed.
HoustonPutman Jan 17, 2026
08326cc
One more that shouldn't be changed
HoustonPutman Jan 17, 2026
2513cd4
Fixes for passing callingLockIds around
HoustonPutman Jan 22, 2026
7dc8a98
Tidy
HoustonPutman Jan 28, 2026
b0f4520
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 29, 2026
dbb01cd
Deserialize callingLockIds in context class
HoustonPutman Jan 29, 2026
e87052e
Make some fixes for LockTree, add tests to validate
HoustonPutman Jan 29, 2026
5a2b445
Improve tests, fix errors in locking
HoustonPutman Jan 30, 2026
1e4767a
Add tests for Distributed locking. Make fixes
HoustonPutman Jan 30, 2026
b7939e0
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Mar 24, 2026
1bf192c
Fix precommit issues
HoustonPutman Mar 24, 2026
e750951
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Apr 14, 2026
a9b4cdb
Make sure that locking must be top-down, and cannot introduce dead-locks
HoustonPutman Apr 14, 2026
76ea4fd
Only pass relevant lockId, not all lockIds
HoustonPutman Apr 14, 2026
629d9c4
One missed refactored name
HoustonPutman Apr 14, 2026
2687168
Some fixes
HoustonPutman Apr 14, 2026
edf4a12
Improve changelog entry
HoustonPutman Apr 15, 2026
50f97a2
Fix error scenario for overseer, add test
HoustonPutman Apr 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions changelog/unreleased/solr-18011-locking-update.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
title: Allow locked Admin APIs to call other locked AdminAPIs. These locked Admin APIs can only call other APIs on the same resource tree (Collection > Shard > Replica) to protect against deadlocks.
type: changed # added, changed, fixed, deprecated, removed, dependency_update, security, other
authors:
- name: Houston Putman
nick: HoustonPutman
links:
- name: SOLR-18011
url: https://issues.apache.org/jira/browse/SOLR-18011
6 changes: 6 additions & 0 deletions solr/core/src/java/org/apache/solr/api/V2HttpCall.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.solr.api;

import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.params.CollectionAdminParams.CALLING_LOCK_ID_HEADER;
import static org.apache.solr.servlet.HttpSolrCall.Action.ADMIN;
import static org.apache.solr.servlet.HttpSolrCall.Action.ADMIN_OR_REMOTEPROXY;
import static org.apache.solr.servlet.HttpSolrCall.Action.PROCESS;
Expand Down Expand Up @@ -212,6 +213,11 @@ private void initAdminRequest(String path) throws Exception {
solrReq.getContext().put(CoreContainer.class.getName(), cores);
requestType = AuthorizationContext.RequestType.ADMIN;
action = ADMIN;

String callingLockId = req.getHeader(CALLING_LOCK_ID_HEADER);
if (callingLockId != null && !callingLockId.isBlank()) {
solrReq.getContext().put(CALLING_LOCK_ID_HEADER, callingLockId);
}
}

protected void parseRequest() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public interface DistributedCollectionLockFactory {
* @param replicaName is ignored and can be {@code null} if {@code level} is {@link
* org.apache.solr.common.params.CollectionParams.LockLevel#COLLECTION} or {@link
* org.apache.solr.common.params.CollectionParams.LockLevel#SHARD}
* @param callingLockId the lockId from the caller that should be mirrored by this lock
* @return a lock instance that must be {@link DistributedLock#release()}'ed in a {@code finally},
* regardless of the lock having been acquired or not.
*/
Expand All @@ -62,5 +63,6 @@ DistributedLock createLock(
CollectionParams.LockLevel level,
String collName,
String shardId,
String replicaName);
String replicaName,
String callingLockId);
}
4 changes: 4 additions & 0 deletions solr/core/src/java/org/apache/solr/cloud/DistributedLock.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ public interface DistributedLock {
void release();

boolean isAcquired();

String getLockId();

boolean isMirroringLock();
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
import com.google.common.annotations.VisibleForTesting;
import java.lang.invoke.MethodHandles;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.StrUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -46,7 +48,12 @@ public void waitUntilAcquired() {
for (DistributedLock lock : locks) {
log.debug("DistributedMultiLock.waitUntilAcquired. About to wait on lock {}", lock);
lock.waitUntilAcquired();
log.debug("DistributedMultiLock.waitUntilAcquired. Acquired lock {}", lock);
if (lock.isMirroringLock()) {
log.debug(
"DistributedMultiLock.waitUntilAcquired. Mirroring already-acquired lock {}", lock);
} else {
log.debug("DistributedMultiLock.waitUntilAcquired. Acquired lock {}", lock);
}
}
}

Expand All @@ -70,6 +77,17 @@ public boolean isAcquired() {
return true;
}

public String getLockId() {
return locks.stream().map(DistributedLock::getLockId).collect(Collectors.joining(","));
}

public static List<String> splitLockIds(String lockIds) {
if (StrUtils.isBlank(lockIds)) {
return List.of();
}
return List.of(lockIds.split(","));
}

@VisibleForTesting
public int getCountInternalLocks() {
return locks.size();
Expand Down
117 changes: 107 additions & 10 deletions solr/core/src/java/org/apache/solr/cloud/LockTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@
import java.util.ArrayDeque;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.UUID;
import org.apache.solr.cloud.OverseerMessageHandler.Lock;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CollectionParams.LockLevel;
import org.apache.solr.common.util.StrUtils;
Expand All @@ -38,20 +41,36 @@ public class LockTree {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final Node root = new Node(null, LockLevel.CLUSTER, null);

public final Map<String, Lock> allLocks = new HashMap<>();

private class LockImpl implements Lock {
final Node node;
final String id;

LockImpl(Node node) {
this.node = node;
this.id = UUID.randomUUID().toString();
}

@Override
public void unlock() {
synchronized (LockTree.this) {
node.unlock(this);
if (node.unlock(this)) {
allLocks.remove(id);
}
}
}

@Override
public String id() {
return id;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return node.validateSubpath(lockLevel, path);
}

@Override
public String toString() {
return StrUtils.join(node.constructPath(new ArrayDeque<>()), '/');
Expand All @@ -71,12 +90,43 @@ public String toString() {
public class Session {
private SessionNode root = new SessionNode(LockLevel.CLUSTER);

public Lock lock(CollectionParams.CollectionAction action, List<String> path) {
public Lock lock(
CollectionParams.CollectionAction action, List<String> path, String callingLockId) {
if (action.lockLevel == LockLevel.NONE) return FREELOCK;
Node startingNode = LockTree.this.root;
SessionNode startingSession = root;

// If a callingLockId was passed in, validate it with the current lock path, and only start
// locking below the calling lock
Lock callingLock = StrUtils.isBlank(callingLockId) ? null : allLocks.get(callingLockId);
log.debug("Calling lock id: {}, level: {}", callingLockId, callingLock);
boolean reuseCurrentLock = false;
if (callingLock != null) {
if (callingLock.validateSubpath(action.lockLevel.getHeight(), path)) {
startingNode = ((LockImpl) callingLock).node;
startingSession = startingSession.find(startingNode.level.getHeight(), path);
if (startingSession == null) {
startingSession = root;
}
reuseCurrentLock = true;
} else {
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR,
String.format(
Locale.ROOT,
"Cannot lock an action under a different path than the calling action. Calling action lock path: %s, Requested action lock path: %s",
callingLock,
String.join("/", path)));
}
}
synchronized (LockTree.this) {
if (root.isBusy(action.lockLevel, path)) return null;
Lock lockObject = LockTree.this.root.lock(action.lockLevel, path);
if (lockObject == null) root.markBusy(action.lockLevel, path);
if (startingSession.isBusy(action.lockLevel, path)) return null;
Lock lockObject = startingNode.lock(action.lockLevel, path, reuseCurrentLock);
if (lockObject == null) {
startingSession.markBusy(action.lockLevel, path);
} else {
allLocks.put(lockObject.id(), lockObject);
}
return lockObject;
}
}
Expand Down Expand Up @@ -125,6 +175,18 @@ boolean isBusy(LockLevel lockLevel, List<String> path) {
return false;
}
}

SessionNode find(int lockLevel, List<String> path) {
if (level.getHeight() == lockLevel) {
return this;
} else if (level.getHeight() < lockLevel
&& kids != null
&& kids.containsKey(path.get(level.getHeight()))) {
return kids.get(path.get(level.getHeight())).find(lockLevel, path);
} else {
return null;
}
}
}

public Session getSession() {
Expand All @@ -135,6 +197,7 @@ private class Node {
final String name;
final Node mom;
final LockLevel level;
int refCount = 0;
HashMap<String, Node> children = new HashMap<>();
LockImpl myLock;

Expand All @@ -151,36 +214,70 @@ boolean isLocked() {
return false;
}

void unlock(LockImpl lockObject) {
boolean unlock(LockImpl lockObject) {
if (--refCount > 0) {
return false;
}
if (myLock == lockObject) myLock = null;
else {
log.info("Unlocked multiple times : {}", lockObject);
}
return true;
}

Lock lock(LockLevel lockLevel, List<String> path) {
if (myLock != null) return null; // I'm already locked. no need to go any further
Lock lock(LockLevel lockLevel, List<String> path, boolean reuseCurrentLock) {
if (myLock != null && !reuseCurrentLock) {
// I'm already locked. no need to go any further
return null;
}
if (lockLevel == level) {
// lock is supposed to be acquired at this level
if (myLock != null && reuseCurrentLock) {
// I am already locked, and I want to be re-used
refCount++;
return myLock;
}
// If I am locked or any of my children or grandchildren are locked
// it is not possible to acquire a lock
if (isLocked()) return null;
refCount++;
return myLock = new LockImpl(this);
} else {
String childName = path.get(level.getHeight());
Node child = children.get(childName);
if (child == null)
children.put(childName, child = new Node(childName, level.getChild(), this));
return child.lock(lockLevel, path);
return child.lock(lockLevel, path, false);
}
}

boolean validateSubpath(int lockLevel, List<String> path) {
return level.getHeight() <= lockLevel
&& (level.getHeight() == 0 || name.equals(path.get(level.getHeight() - 1)))
&& (mom == null || mom.validateSubpath(lockLevel, path));
}

ArrayDeque<String> constructPath(ArrayDeque<String> collect) {
if (name != null) collect.addFirst(name);
if (mom != null) mom.constructPath(collect);
return collect;
}
}

static final Lock FREELOCK = () -> {};
static final String FREELOCK_ID = "-1";
static final Lock FREELOCK =
new Lock() {
@Override
public void unlock() {}

@Override
public String id() {
return FREELOCK_ID;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return false;
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.lang.invoke.MethodHandles;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
Expand Down Expand Up @@ -61,7 +62,7 @@ public OverseerConfigSetMessageHandler(ZkStateReader zkStateReader, CoreContaine
}

@Override
public OverseerSolrResponse processMessage(ZkNodeProps message, String operation) {
public OverseerSolrResponse processMessage(ZkNodeProps message, String operation, Lock lock) {
NamedList<Object> results = new NamedList<>();
try {
if (!operation.startsWith(CONFIGSETS_ACTION_PREFIX)) {
Expand Down Expand Up @@ -113,11 +114,26 @@ public String getTimerName(String operation) {
}

@Override
public Lock lockTask(ZkNodeProps message, long ignored) {
public Lock lockTask(ZkNodeProps message, long ignored, String callingLockId) {
String configSetName = getTaskKey(message);
if (canExecute(configSetName, message)) {
markExclusiveTask(configSetName, message);
return () -> unmarkExclusiveTask(configSetName, message);
return new Lock() {
@Override
public void unlock() {
unmarkExclusiveTask(configSetName, message);
}

@Override
public String id() {
return configSetName;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return false;
}
};
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.solr.cloud;

import java.util.List;
import org.apache.solr.common.cloud.ZkNodeProps;

/** Interface for processing messages received by an {@link OverseerTaskProcessor} */
Expand All @@ -26,7 +27,7 @@ public interface OverseerMessageHandler {
* @param operation the operation to process
* @return response
*/
OverseerSolrResponse processMessage(ZkNodeProps message, String operation);
OverseerSolrResponse processMessage(ZkNodeProps message, String operation, Lock lock);

/**
* @return the name of the OverseerMessageHandler
Expand All @@ -41,14 +42,18 @@ public interface OverseerMessageHandler {

interface Lock {
void unlock();

String id();

boolean validateSubpath(int lockLevel, List<String> path);
}

/**
* Grabs an exclusive lock for this particular task.
*
* @return <code>null</code> if locking is not possible.
*/
Lock lockTask(ZkNodeProps message, long batchSessionId);
Lock lockTask(ZkNodeProps message, long batchSessionId, String callingLockId);

/**
* @param message the message being processed
Expand Down
Loading
Loading