11 – Resilience (Circuit Breaker & Retry)
Version : 4.0.0 Date : 2025-12-09
1. Introduction
Le Socle V4 intègre des patterns de résilience pour gérer les défaillances des systèmes externes :
- Retry : Réessayer les opérations échouées
- Circuit Breaker : Protéger contre les cascades de défaillances
2. Retry Pattern
2.1 Configuration
socle:
resilience:
retry:
max-attempts: ${RETRY_MAX_ATTEMPTS:3}
initial-delay-ms: ${RETRY_INITIAL_DELAY_MS:1000}
max-delay-ms: ${RETRY_MAX_DELAY_MS:30000}
multiplier: ${RETRY_MULTIPLIER:2.0}
2.2 Interface RetryTemplate
package eu.lmvi.socle.resilience;
public interface RetryTemplate {
/**
* Exécute une opération avec retry
*/
<T> T execute(Supplier<T> operation) throws RetryExhaustedException;
/**
* Exécute une opération avec retry et fallback
*/
<T> T executeWithFallback(Supplier<T> operation, Supplier<T> fallback);
/**
* Exécute une opération void avec retry
*/
void executeVoid(Runnable operation) throws RetryExhaustedException;
}
2.3 Implémentation
package eu.lmvi.socle.resilience;
@Component
public class ExponentialBackoffRetryTemplate implements RetryTemplate {
private static final Logger log = LoggerFactory.getLogger(ExponentialBackoffRetryTemplate.class);
private final int maxAttempts;
private final long initialDelayMs;
private final long maxDelayMs;
private final double multiplier;
public ExponentialBackoffRetryTemplate(SocleConfiguration config) {
this.maxAttempts = config.getResilience().getRetry().getMaxAttempts();
this.initialDelayMs = config.getResilience().getRetry().getInitialDelayMs();
this.maxDelayMs = config.getResilience().getRetry().getMaxDelayMs();
this.multiplier = config.getResilience().getRetry().getMultiplier();
}
@Override
public <T> T execute(Supplier<T> operation) throws RetryExhaustedException {
Exception lastException = null;
long delay = initialDelayMs;
for (int attempt = 1; attempt <= maxAttempts; attempt++) {
try {
return operation.get();
} catch (Exception e) {
lastException = e;
log.warn("Attempt {}/{} failed: {}", attempt, maxAttempts, e.getMessage());
if (attempt < maxAttempts) {
sleep(delay);
delay = Math.min((long) (delay * multiplier), maxDelayMs);
}
}
}
throw new RetryExhaustedException(
"Operation failed after " + maxAttempts + " attempts",
lastException
);
}
@Override
public <T> T executeWithFallback(Supplier<T> operation, Supplier<T> fallback) {
try {
return execute(operation);
} catch (RetryExhaustedException e) {
log.warn("All retries exhausted, using fallback");
return fallback.get();
}
}
@Override
public void executeVoid(Runnable operation) throws RetryExhaustedException {
execute(() -> {
operation.run();
return null;
});
}
private void sleep(long ms) {
try {
Thread.sleep(ms);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("Retry interrupted", e);
}
}
}
2.4 Utilisation
@Service
public class ExternalApiService {
@Autowired
private RetryTemplate retryTemplate;
public Data fetchData(String id) {
return retryTemplate.execute(() -> {
// Appel qui peut échouer
return httpClient.get("/data/" + id);
});
}
public Data fetchDataWithFallback(String id) {
return retryTemplate.executeWithFallback(
() -> httpClient.get("/data/" + id),
() -> getCachedData(id) // Fallback vers le cache
);
}
}
3. Circuit Breaker Pattern
3.1 États
succès
┌─────────────────┐
│ │
▼ │
┌─────────┐ ┌─────┴─────┐ ┌─────────┐
│ CLOSED │─────►│ OPEN │─────►│HALF_OPEN│
└────┬────┘ └───────────┘ └────┬────┘
│ échecs │ timeout │
│ threshold │ │
│ │ │
└────────────────┴──────────────────┘
retour succès
- CLOSED : Fonctionnement normal, les requêtes passent
- OPEN : Circuit ouvert, les requêtes échouent immédiatement
- HALF_OPEN : Test de reprise, quelques requêtes passent
3.2 Configuration
socle:
resilience:
circuit-breaker:
failure-threshold: ${CB_FAILURE_THRESHOLD:5}
success-threshold: ${CB_SUCCESS_THRESHOLD:3}
timeout-ms: ${CB_TIMEOUT_MS:60000}
half-open-requests: ${CB_HALF_OPEN_REQUESTS:3}
3.3 Interface CircuitBreaker
package eu.lmvi.socle.resilience;
public interface CircuitBreaker {
/**
* Nom du circuit
*/
String getName();
/**
* État actuel
*/
CircuitState getState();
/**
* Exécute une opération protégée
*/
<T> T execute(Supplier<T> operation) throws CircuitBreakerOpenException;
/**
* Exécute avec fallback
*/
<T> T executeWithFallback(Supplier<T> operation, Supplier<T> fallback);
/**
* Force l'ouverture
*/
void forceOpen();
/**
* Force la fermeture
*/
void forceClose();
/**
* Reset les compteurs
*/
void reset();
/**
* Métriques
*/
CircuitBreakerMetrics getMetrics();
}
public enum CircuitState {
CLOSED,
OPEN,
HALF_OPEN
}
3.4 Implémentation
package eu.lmvi.socle.resilience;
public class DefaultCircuitBreaker implements CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(DefaultCircuitBreaker.class);
private final String name;
private final int failureThreshold;
private final int successThreshold;
private final long timeoutMs;
private final int halfOpenRequests;
private volatile CircuitState state = CircuitState.CLOSED;
private final AtomicInteger failureCount = new AtomicInteger(0);
private final AtomicInteger successCount = new AtomicInteger(0);
private final AtomicInteger halfOpenCount = new AtomicInteger(0);
private volatile Instant lastFailureTime;
private final ReentrantLock lock = new ReentrantLock();
@Override
public <T> T execute(Supplier<T> operation) throws CircuitBreakerOpenException {
if (!allowRequest()) {
throw new CircuitBreakerOpenException(name);
}
try {
T result = operation.get();
onSuccess();
return result;
} catch (Exception e) {
onFailure();
throw e;
}
}
@Override
public <T> T executeWithFallback(Supplier<T> operation, Supplier<T> fallback) {
try {
return execute(operation);
} catch (CircuitBreakerOpenException e) {
log.debug("[{}] Circuit open, using fallback", name);
return fallback.get();
} catch (Exception e) {
log.warn("[{}] Operation failed, using fallback", name, e);
return fallback.get();
}
}
private boolean allowRequest() {
switch (state) {
case CLOSED:
return true;
case OPEN:
// Vérifier si le timeout est passé
if (lastFailureTime != null &&
Duration.between(lastFailureTime, Instant.now()).toMillis() > timeoutMs) {
transitionTo(CircuitState.HALF_OPEN);
return true;
}
return false;
case HALF_OPEN:
// Limiter les requêtes en half-open
return halfOpenCount.incrementAndGet() <= halfOpenRequests;
default:
return false;
}
}
private void onSuccess() {
lock.lock();
try {
switch (state) {
case CLOSED:
failureCount.set(0);
break;
case HALF_OPEN:
if (successCount.incrementAndGet() >= successThreshold) {
transitionTo(CircuitState.CLOSED);
}
break;
}
} finally {
lock.unlock();
}
}
private void onFailure() {
lock.lock();
try {
lastFailureTime = Instant.now();
switch (state) {
case CLOSED:
if (failureCount.incrementAndGet() >= failureThreshold) {
transitionTo(CircuitState.OPEN);
}
break;
case HALF_OPEN:
transitionTo(CircuitState.OPEN);
break;
}
} finally {
lock.unlock();
}
}
private void transitionTo(CircuitState newState) {
if (state != newState) {
log.info("[{}] Circuit state: {} -> {}", name, state, newState);
state = newState;
failureCount.set(0);
successCount.set(0);
halfOpenCount.set(0);
}
}
@Override
public CircuitState getState() {
return state;
}
@Override
public String getName() {
return name;
}
@Override
public void forceOpen() {
transitionTo(CircuitState.OPEN);
}
@Override
public void forceClose() {
transitionTo(CircuitState.CLOSED);
}
@Override
public void reset() {
lock.lock();
try {
state = CircuitState.CLOSED;
failureCount.set(0);
successCount.set(0);
halfOpenCount.set(0);
lastFailureTime = null;
} finally {
lock.unlock();
}
}
}
3.5 CircuitBreakerRegistry
@Component
public class CircuitBreakerRegistry {
private final ConcurrentHashMap<String, CircuitBreaker> circuits = new ConcurrentHashMap<>();
private final SocleConfiguration config;
public CircuitBreaker getOrCreate(String name) {
return circuits.computeIfAbsent(name, this::createCircuitBreaker);
}
public CircuitBreaker get(String name) {
return circuits.get(name);
}
public Map<String, CircuitState> getAllStates() {
return circuits.entrySet().stream()
.collect(Collectors.toMap(
Map.Entry::getKey,
e -> e.getValue().getState()
));
}
private CircuitBreaker createCircuitBreaker(String name) {
return new DefaultCircuitBreaker(
name,
config.getResilience().getCircuitBreaker().getFailureThreshold(),
config.getResilience().getCircuitBreaker().getSuccessThreshold(),
config.getResilience().getCircuitBreaker().getTimeoutMs(),
config.getResilience().getCircuitBreaker().getHalfOpenRequests()
);
}
}
3.6 Utilisation
@Service
public class PaymentService {
@Autowired
private CircuitBreakerRegistry cbRegistry;
public PaymentResult processPayment(Payment payment) {
CircuitBreaker cb = cbRegistry.getOrCreate("payment-gateway");
return cb.executeWithFallback(
() -> paymentGateway.process(payment),
() -> {
// Fallback: mettre en queue pour traitement ultérieur
paymentQueue.enqueue(payment);
return PaymentResult.pending("Queued for later processing");
}
);
}
}
4. Combinaison Retry + Circuit Breaker
@Service
public class ResilientApiClient {
@Autowired
private RetryTemplate retryTemplate;
@Autowired
private CircuitBreakerRegistry cbRegistry;
public Data fetchData(String endpoint) {
CircuitBreaker cb = cbRegistry.getOrCreate("api-" + endpoint);
return cb.executeWithFallback(
() -> retryTemplate.execute(() -> httpClient.get(endpoint)),
() -> getCachedData(endpoint)
);
}
}
Ordre d’exécution
1. CircuitBreaker vérifie si le circuit est ouvert
→ Si OPEN: fallback immédiat
→ Si CLOSED/HALF_OPEN: continue
2. RetryTemplate essaie l'opération
→ Retry avec backoff exponentiel
→ Si tous les retries échouent: exception
3. CircuitBreaker compte l'échec
→ Si seuil atteint: passe en OPEN
4. Fallback si échec
5. Annotations (optionnel)
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Resilient {
String circuitBreaker() default "";
int maxRetries() default 3;
long retryDelay() default 1000;
Class<? extends Throwable>[] retryOn() default {Exception.class};
}
@Aspect
@Component
public class ResilienceAspect {
@Around("@annotation(resilient)")
public Object handleResilience(ProceedingJoinPoint pjp, Resilient resilient) throws Throwable {
String cbName = resilient.circuitBreaker();
if (cbName.isEmpty()) {
cbName = pjp.getSignature().getDeclaringTypeName() + "." + pjp.getSignature().getName();
}
CircuitBreaker cb = cbRegistry.getOrCreate(cbName);
return cb.execute(() -> {
return retryTemplate.execute(() -> {
try {
return pjp.proceed();
} catch (Throwable t) {
throw new RuntimeException(t);
}
});
});
}
}
Utilisation
@Service
public class UserService {
@Resilient(circuitBreaker = "user-api", maxRetries = 5)
public User getUser(String id) {
return userApiClient.fetchUser(id);
}
}
6. Bulkhead Pattern
Le pattern Bulkhead limite le nombre d’appels concurrents pour éviter l’épuisement des ressources.
@Component
public class BulkheadRegistry {
private final ConcurrentHashMap<String, Semaphore> bulkheads = new ConcurrentHashMap<>();
public <T> T execute(String name, int maxConcurrent, Supplier<T> operation)
throws BulkheadFullException {
Semaphore semaphore = bulkheads.computeIfAbsent(name,
k -> new Semaphore(maxConcurrent));
if (!semaphore.tryAcquire()) {
throw new BulkheadFullException(name);
}
try {
return operation.get();
} finally {
semaphore.release();
}
}
}
// Utilisation
@Service
public class ApiService {
@Autowired
private BulkheadRegistry bulkheadRegistry;
public Data callExternalApi() {
return bulkheadRegistry.execute("external-api", 10, () -> {
// Max 10 appels concurrents
return httpClient.get("/api/data");
});
}
}
7. Timeout Pattern
@Component
public class TimeoutTemplate {
private final ScheduledExecutorService scheduler =
Executors.newScheduledThreadPool(4);
public <T> T executeWithTimeout(Supplier<T> operation, Duration timeout)
throws TimeoutException {
CompletableFuture<T> future = CompletableFuture.supplyAsync(operation);
try {
return future.get(timeout.toMillis(), TimeUnit.MILLISECONDS);
} catch (java.util.concurrent.TimeoutException e) {
future.cancel(true);
throw new TimeoutException("Operation timed out after " + timeout);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
8. API Admin
@RestController
@RequestMapping("/admin/resilience")
public class ResilienceController {
@Autowired
private CircuitBreakerRegistry cbRegistry;
@GetMapping("/circuits")
public Map<String, CircuitState> getCircuits() {
return cbRegistry.getAllStates();
}
@PostMapping("/circuits/{name}/reset")
public void resetCircuit(@PathVariable String name) {
CircuitBreaker cb = cbRegistry.get(name);
if (cb != null) {
cb.reset();
}
}
@PostMapping("/circuits/{name}/open")
public void openCircuit(@PathVariable String name) {
CircuitBreaker cb = cbRegistry.get(name);
if (cb != null) {
cb.forceOpen();
}
}
@PostMapping("/circuits/{name}/close")
public void closeCircuit(@PathVariable String name) {
CircuitBreaker cb = cbRegistry.get(name);
if (cb != null) {
cb.forceClose();
}
}
}
9. Métriques
@Component
public class ResilienceMetrics {
private final MeterRegistry registry;
public void recordRetry(String operation, int attempt, boolean success) {
Counter.builder("socle_retry_attempts")
.tag("operation", operation)
.tag("attempt", String.valueOf(attempt))
.tag("success", String.valueOf(success))
.register(registry)
.increment();
}
public void recordCircuitBreakerState(String name, CircuitState state) {
Gauge.builder("socle_circuit_breaker_state", () ->
state == CircuitState.CLOSED ? 0 :
state == CircuitState.HALF_OPEN ? 1 : 2)
.tag("name", name)
.register(registry);
}
}
10. Bonnes pratiques
DO
- Utiliser le circuit breaker pour les appels réseau externes
- Configurer des timeouts appropriés
- Toujours prévoir un fallback
- Monitorer l’état des circuits
- Logger les transitions d’état
DON’T
- Ne pas utiliser le retry pour les erreurs non récupérables (400, 401)
- Ne pas configurer des seuils trop bas (faux positifs)
- Ne pas oublier les timeouts (risque de blocage)
- Ne pas ignorer les métriques
11. Références
- 05-WORKERS – Workers
- 09-PIPELINE – Pipeline
- Martin Fowler – Circuit Breaker
- Resilience4j

Laisser un commentaire