LangChain4j 并发处理与线程安全深度解析

一、LangChain4j 并发架构设计

1.1 并发模型概述

LangChain4j 采用多层级并发控制策略：

┌───────────────────────────────────────────────────┐
│ 应用层并发控制                                    │
├───────────────────────────────────────────────────┤
│ 线程池配置 | 请求队列 | 超时控制 | 熔断机制        │
├───────────────────────────────────────────────────┤
│ HTTP 客户端并发控制                                 │
├───────────────────────────────────────────────────┤
│ 连接池管理 | 连接复用 | 请求复用 | 流控机制        │
├───────────────────────────────────────────────────┤
│ 供应商 API 并发限制                                 │
├───────────────────────────────────────────────────┤
│ 速率限制 | 令牌限制 | 配额管理 | 优先级队列        │
└───────────────────────────────────────────────────┘

1.2 核心并发组件

// LangChain4j 并发相关的关键接口和类
public interface ConcurrentModel {
    // 异步模型接口
    interface AsyncChatModel extends ChatLanguageModel {
        CompletableFuture<Response<String>> generateAsync(String prompt);
        CompletableFuture<Response<String>> generateAsync(List<ChatMessage> messages);
    }

    // 流式模型接口
    interface StreamingChatModel {
        Flux<Response<String>> generateStream(List<ChatMessage> messages);
    }

    // 批量处理接口
    interface BatchChatModel {
        List<Response<String>> generateBatch(List<List<ChatMessage>> batches);
    }
}

二、并发请求处理机制详解

2.1 HTTP 客户端并发配置

package com.example.concurrent;

 dev.langchain4j.model.openai.OpenAiChatModel;
 okhttp3.*;
 java.util.concurrent.*;


   {

    
      OkHttpClient  {
        
            (
            , 
            , TimeUnit.MINUTES 
        );

        
            ();
        dispatcher.setMaxRequests(); 
        dispatcher.setMaxRequestsPerHost(); 

        
        OkHttpClient.    .Builder()
            .connectTimeout(, TimeUnit.SECONDS) 
            .writeTimeout(, TimeUnit.SECONDS) 
            .readTimeout(, TimeUnit.SECONDS) 
            .callTimeout(, TimeUnit.SECONDS) 
            .connectionPool(connectionPool)
            .dispatcher(dispatcher);

        
        builder.addInterceptor( ()); 
        builder.addInterceptor( ()); 
        builder.addInterceptor( ()); 

        
        builder.connectionSpecs(Arrays.asList(
            ConnectionSpec.MODERN_TLS, 
            ConnectionSpec.COMPATIBLE_TLS 
        ));

        
        builder.dns( .Builder()
            .url(HttpUrl.get())
            .build());

         builder.build();
    }

    
         {
           maxRetries;

          {
            .maxRetries = maxRetries;
        }

        
         Response   IOException {
               chain.request();
               ;
               ;

             (   ; retryCount <= maxRetries; retryCount++) {
                 {
                    response = chain.proceed(request);
                    
                     (shouldRetry(response, retryCount)) {
                        response.close();
                         (retryCount < maxRetries) {
                            waitBeforeRetry(retryCount);
                            ;
                        }
                    }
                     response;
                }  (IOException e) {
                    exception = e;
                     (retryCount < maxRetries) {
                        waitBeforeRetry(retryCount);
                        ;
                    }
                }
            }
             exception !=  ? exception :  ();
        }

           {
               response.code();
             code ==  || 
                   code >=  || 
                   code == ; 
        }

           {
             {
                   () Math.pow(, retryCount) * ;
                Thread.sleep(waitTime);
            }  (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
    }

    
         {
          RateLimiter rateLimiter;

          {
            
            .rateLimiter = RateLimiter.create(, );
        }

        
         Response   IOException {
            rateLimiter.acquire(); 
             chain.proceed(chain.request());
        }
    }
}

package com.example.concurrent; import dev.langchain4j.model.chat.ChatLanguageModel; import dev.langchain4j.model.output.Response; import java.util.*; import java.util.concurrent.*; import java.util.concurrent.atomic.*; /** * 并发安全的聊天模型包装器 */ public class ConcurrentChatModel implements ChatLanguageModel { private final ChatLanguageModel delegate; private final ExecutorService executorService; private final Semaphore concurrencyLimiter; private final AtomicLong requestCounter; private final ConcurrentHashMap<String, AtomicLong> requestMetrics; private final CircuitBreaker circuitBreaker; public ConcurrentChatModel(ChatLanguageModel delegate, int maxConcurrency, int queueSize) { this.delegate = delegate; // 1. 配置线程池 this.executorService = new ThreadPoolExecutor( maxConcurrency, // 核心线程数 maxConcurrency * 2, // 最大线程数 60L, TimeUnit.SECONDS, // 空闲线程超时 new LinkedBlockingQueue<>(queueSize), // 工作队列 new ThreadFactory() { private final AtomicInteger counter = new AtomicInteger(); @Override public Thread newThread(Runnable r) { Thread t = new Thread(r, "ChatModel-Worker-" + counter.incrementAndGet()); t.setDaemon(true); return t; } }, new ThreadPoolExecutor.CallerRunsPolicy() // 拒绝策略 ); // 2. 并发限制器 this.concurrencyLimiter = new Semaphore(maxConcurrency); // 3. 指标收集 this.requestCounter = new AtomicLong(); this.requestMetrics = new ConcurrentHashMap<>(); // 4. 熔断器 this.circuitBreaker = new CircuitBreaker(5, 10000, 0.5); } @Override public Response<String> generate(String prompt) { return generate(Collections.singletonList(UserMessage.from(prompt))); } @Override public Response<String> generate(List<ChatMessage> messages) { // 检查熔断器 if (!circuitBreaker.allowRequest()) { throw new RuntimeException("Circuit breaker is open"); } long startTime = System.nanoTime(); String requestId = UUID.randomUUID().toString(); try { // 获取并发许可 if (!concurrencyLimiter.tryAcquire(30, TimeUnit.SECONDS)) { throw new RuntimeException("Concurrency limit timeout"); } // 提交任务 CompletableFuture<Response<String>> future = CompletableFuture.supplyAsync(() -> { try { recordRequestStart(requestId); return delegate.generate(messages); } catch (Exception e) { circuitBreaker.recordFailure(); throw e; } }, executorService); // 设置超时 Response<String> response = future.get(120, TimeUnit.SECONDS); // 记录成功 circuitBreaker.recordSuccess(); recordRequestSuccess(requestId, startTime); return response; } catch (TimeoutException e) { circuitBreaker.recordFailure(); throw new RuntimeException("Request timeout", e); } catch (ExecutionException e) { circuitBreaker.recordFailure(); throw new RuntimeException("Request failed", e.getCause()); } catch (Exception e) { circuitBreaker.recordFailure(); throw new RuntimeException("Unexpected error", e); } finally { concurrencyLimiter.release(); } } @Override public List<Response<String>> generate(List<ChatMessage> messages, int n) { // 批量请求 - 使用并行流 return IntStream.range(0, n).parallel() .mapToObj(i -> generate(messages)) .collect(Collectors.toList()); } @Override public Stream<Response<String>> generateStream(List<ChatMessage> messages) { // 流式请求 - 使用响应式流 return StreamSupport.stream( Spliterators.spliteratorUnknownSize(new Iterator<Response<String>>() { private boolean hasNext = true; @Override public boolean hasNext() { return hasNext; } @Override public Response<String> next() { // 流式生成逻辑 return null; } }, Spliterator.ORDERED), false); } /** * 异步生成方法 */ public CompletableFuture<Response<String>> generateAsync(List<ChatMessage> messages) { return CompletableFuture.supplyAsync(() -> generate(messages), executorService); } /** * 批量异步生成 */ public CompletableFuture<List<Response<String>>> generateBatchAsync(List<List<ChatMessage>> batches) { List<CompletableFuture<Response<String>>> futures = batches.stream() .map(this::generateAsync) .collect(Collectors.toList()); return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])) .thenApply(v -> futures.stream() .map(CompletableFuture::join) .collect(Collectors.toList())); } private void recordRequestStart(String requestId) { requestCounter.incrementAndGet(); requestMetrics.put(requestId, new AtomicLong(System.nanoTime())); } private void recordRequestSuccess(String requestId, long startTime) { long duration = System.nanoTime() - startTime; AtomicLong counter = requestMetrics.get(requestId); if (counter != null) { counter.set(duration); } } /** * 获取性能指标 */ public Map<String, Object> getMetrics() { Map<String, Object> metrics = new HashMap<>(); metrics.put("totalRequests", requestCounter.get()); metrics.put("activeRequests", concurrencyLimiter.availablePermits()); metrics.put("queueSize", ((ThreadPoolExecutor) executorService).getQueue().size()); metrics.put("activeThreads", ((ThreadPoolExecutor) executorService).getActiveCount()); // 计算平均响应时间 List<Long> durations = requestMetrics.values().stream() .map(AtomicLong::get) .filter(v -> v > 0) .collect(Collectors.toList()); if (!durations.isEmpty()) { double avgDuration = durations.stream() .mapToLong(Long::longValue) .average() .orElse(0.0) / 1_000_000.0; // 转换为毫秒 metrics.put("avgResponseTimeMs", avgDuration); } return metrics; } /** * 熔断器实现 */ private static class CircuitBreaker { private final int failureThreshold; private final long recoveryTimeout; private final double failureRateThreshold; private final AtomicInteger failureCount = new AtomicInteger(); private final AtomicInteger successCount = new AtomicInteger(); private volatile long lastFailureTime = 0; private volatile boolean isOpen = false; public CircuitBreaker(int failureThreshold, long recoveryTimeout, double failureRateThreshold) { this.failureThreshold = failureThreshold; this.recoveryTimeout = recoveryTimeout; this.failureRateThreshold = failureRateThreshold; } public boolean allowRequest() { if (!isOpen) { return true; } // 检查是否应该尝试恢复 if (System.currentTimeMillis() - lastFailureTime > recoveryTimeout) { isOpen = false; failureCount.set(0); successCount.set(0); return true; } return false; } public void recordSuccess() { successCount.incrementAndGet(); // 重置失败计数（成功率超过阈值） double failureRate = calculateFailureRate(); if (failureRate < failureRateThreshold) { failureCount.set(0); } } public void recordFailure() { int failures = failureCount.incrementAndGet(); lastFailureTime = System.currentTimeMillis(); // 检查是否需要打开熔断器 if (failures >= failureThreshold) { isOpen = true; } } private double calculateFailureRate() { int total = successCount.get() + failureCount.get(); return total > 0 ? (double) failureCount.get() / total : 0.0; } } }

package com.example.concurrent; import dev.langchain4j.model.chat.ChatLanguageModel; import dev.langchain4j.model.output.Response; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import reactor.core.scheduler.Schedulers; import java.util.List; import java.util.concurrent.CompletableFuture; /** * 响应式聊天模型包装器 */ public class ReactiveChatModel { private final ChatLanguageModel delegate; private final Scheduler scheduler; public ReactiveChatModel(ChatLanguageModel delegate) { this.delegate = delegate; // 配置响应式调度器 this.scheduler = Schedulers.newBoundedElastic(100, 1000, "ReactiveChatModel", 60, true); } /** * 响应式生成方法 */ public Mono<Response<String>> generateReactive(List<ChatMessage> messages) { return Mono.fromCallable(() -> delegate.generate(messages)) .subscribeOn(scheduler) .timeout(Duration.ofSeconds(120)) .onErrorResume(e -> { // 错误处理逻辑 return Mono.error(new RuntimeException("Generation failed", e)); }); } /** * 流式响应式生成 */ public Flux<String> generateStreamReactive(List<ChatMessage> messages) { return Flux.create(fluxSink -> { try { // 模拟流式响应 String[] chunks = {"Chunk1", "Chunk2", "Chunk3"}; for (String chunk : chunks) { if (fluxSink.isCancelled()) { break; } fluxSink.next(chunk); // 模拟处理延迟 try { Thread.sleep(100); } catch (InterruptedException e) { fluxSink.error(e); return; } } fluxSink.complete(); } catch (Exception e) { fluxSink.error(e); } }).subscribeOn(scheduler); } /** * 批量响应式处理 */ public Flux<Response<String>> generateBatchReactive(List<List<ChatMessage>> batches) { return Flux.fromIterable(batches) .parallel() .runOn(Schedulers.parallel()) .flatMap(this::generateReactive) .sequential(); } /** * 背压控制 */ public Flux<Response<String>> generateWithBackpressure(Flux<List<ChatMessage>> messageFlux) { return messageFlux .onBackpressureBuffer(100) // 缓冲区大小 .flatMap(this::generateReactive, 10); // 最大并发数 } }

package com.example.threadsafe; import java.util.concurrent.atomic.*; import java.util.concurrent.locks.*; /** * LangChain4j 组件线程安全性分析 */ public class ThreadSafetyAnalysis { /** * 1. 无状态组件的线程安全性 */ public static class StatelessComponent { // 无状态组件是天然线程安全的 public String process(String input) { return input.toUpperCase(); } } /** * 2. 有状态组件的线程安全性挑战 */ public static class StatefulComponent { // 问题：非线程安全的计数器 private int counter = 0; public int increment() { counter++; // 非原子操作，存在竞态条件 return counter; } // 解决方案 1：使用原子类 private AtomicInteger safeCounter = new AtomicInteger(0); public int incrementSafely() { return safeCounter.incrementAndGet(); } } /** * 3. 缓存组件的线程安全性 */ public static class ThreadSafeCache<K, V> { // 使用 ConcurrentHashMap 实现线程安全缓存 private final ConcurrentHashMap<K, V> cache = new ConcurrentHashMap<>(); private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); public V get(K key) { // 读锁，允许多个线程同时读取 lock.readLock().lock(); try { return cache.get(key); } finally { lock.readLock().unlock(); } } public void put(K key, V value) { // 写锁，独占访问 lock.writeLock().lock(); try { cache.put(key, value); } finally { lock.writeLock().unlock(); } } // 使用 computeIfAbsent 实现原子操作 public V computeIfAbsent(K key, Function<K, V> mappingFunction) { return cache.computeIfAbsent(key, mappingFunction); } } /** * 4. 会话管理中的线程安全问题 */ public static class SessionManager { // 问题：使用 ThreadLocal 可能导致内存泄漏 private static final ThreadLocal<Session> threadLocalSession = new ThreadLocal<>(); // 解决方案：使用 InheritableThreadLocal 或自定义线程池管理 private static final ThreadLocal<Session> managedThreadLocalSession = new ThreadLocal<Session>() { @Override protected Session initialValue() { return new Session(); } @Override public void remove() { // 清理资源 super.get().close(); super.remove(); } }; // 更好的方案：使用 ConcurrentHashMap + Session ID private final ConcurrentHashMap<String, Session> sessionMap = new ConcurrentHashMap<>(); public Session getSession(String sessionId) { return sessionMap.computeIfAbsent(sessionId, id -> new Session(id)); } } /** * 5. 工具调用的线程安全性 */ public static class ToolExecutor { // 工具类可能不是线程安全的 private final List<Tool> tools; private final ReentrantLock toolLock = new ReentrantLock(); public Object executeTool(String toolName, Object input) { toolLock.lock(); try { Tool tool = findTool(toolName); return tool.execute(input); } finally { toolLock.unlock(); } } // 更好的方案：为每个工具使用独立的锁 private final ConcurrentHashMap<String, ReentrantLock> toolLocks = new ConcurrentHashMap<>(); public Object executeToolWithFineGrainedLock(String toolName, Object input) { ReentrantLock lock = toolLocks.computeIfAbsent(toolName, k -> new ReentrantLock()); lock.lock(); try { Tool tool = findTool(toolName); return tool.execute(input); } finally { lock.unlock(); } } } }

package com.example.threadsafe; import dev.langchain4j.memory.ChatMemory; import dev.langchain4j.memory.chat.MessageWindowChatMemory; /** * ChatMemory 线程安全分析 */ public class ChatMemoryThreadSafety { /** * InMemoryChatMemory 的线程安全问题 */ public static void analyzeInMemoryChatMemory() { // 1. MessageWindowChatMemory 不是线程安全的 ChatMemory memory = MessageWindowChatMemory.withMaxMessages(10); // 在多线程环境中可能出现的问题： // - ConcurrentModificationException // - 消息丢失 // - 状态不一致 // 解决方案：使用同步包装器 ChatMemory synchronizedMemory = synchronizeChatMemory(memory); } /** * 创建线程安全的 ChatMemory 包装器 */ public static ChatMemory synchronizeChatMemory(ChatMemory delegate) { return new ChatMemory() { private final Object lock = new Object(); @Override public String id() { synchronized (lock) { return delegate.id(); } } @Override public void add(ChatMessage message) { synchronized (lock) { delegate.add(message); } } @Override public List<ChatMessage> messages() { synchronized (lock) { return new ArrayList<>(delegate.messages()); } } @Override public void clear() { synchronized (lock) { delegate.clear(); } } }; } /** * 基于 Redis 的线程安全 ChatMemory */ public static class RedisChatMemory implements ChatMemory { private final String sessionId; private final RedisTemplate<String, Object> redisTemplate; private final ReentrantLock lock = new ReentrantLock(); public RedisChatMemory(String sessionId, RedisTemplate<String, Object> redisTemplate) { this.sessionId = sessionId; this.redisTemplate = redisTemplate; } @Override public void add(ChatMessage message) { String key = "chat:memory:" + sessionId; // 使用 Redis 事务确保原子性 redisTemplate.execute(new SessionCallback<Object>() { @Override public Object execute(RedisOperations operations) throws DataAccessException { operations.watch(key); List<ChatMessage> messages = (List<ChatMessage>) operations.opsForValue().get(key); if (messages == null) { messages = new ArrayList<>(); } messages.add(message); operations.multi(); operations.opsForValue().set(key, messages); return operations.exec(); } }); } } }

package com.example.threadsafe; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.output.Response; import java.util.List; import java.util.concurrent.*; /** * EmbeddingModel 并发处理 */ public class EmbeddingModelConcurrency { /** * 线程安全的 EmbeddingModel 包装器 */ public static class ThreadSafeEmbeddingModel implements EmbeddingModel { private final EmbeddingModel delegate; private final ExecutorService executorService; private final RateLimiter rateLimiter; public ThreadSafeEmbeddingModel(EmbeddingModel delegate, int maxConcurrency) { this.delegate = delegate; // 配置线程池 this.executorService = new ThreadPoolExecutor( maxConcurrency, maxConcurrency * 2, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(1000), new ThreadPoolExecutor.CallerRunsPolicy() ); // 配置限流器（根据 API 限制） this.rateLimiter = RateLimiter.create(100.0); // 每秒 100 个请求 } @Override public Response<Embedding> embed(String text) { return embedAll(Collections.singletonList(text)).content().get(0); } @Override public Response<List<Embedding>> embedAll(List<String> texts) { // 限流控制 rateLimiter.acquire(texts.size()); // 并发处理 List<CompletableFuture<Response<Embedding>>> futures = texts.stream() .map(text -> CompletableFuture.supplyAsync(() -> delegate.embed(text), executorService)) .collect(Collectors.toList()); // 收集结果 List<Embedding> embeddings = futures.stream() .map(CompletableFuture::join) .map(Response::content) .collect(Collectors.toList()); return Response.from(embeddings); } /** * 批量嵌入优化 */ public Response<List<Embedding>> embedAllOptimized(List<String> texts) { // 分批处理，每批 100 个 int batchSize = 100; List<List<String>> batches = new ArrayList<>(); for (int i = 0; i < texts.size(); i += batchSize) { batches.add(texts.subList(i, Math.min(i + batchSize, texts.size()))); } // 并行处理批次 List<Embedding> allEmbeddings = batches.parallelStream() .flatMap(batch -> { // 每个批次使用单独的限流器 rateLimiter.acquire(batch.size()); return delegate.embedAll(batch).content().stream(); }) .collect(Collectors.toList()); return Response.from(allEmbeddings); } } }

package com.example.bestpractice; import org.apache.http.client.config.RequestConfig; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; /** * HTTP 连接池最佳实践 */ public class ConnectionPoolBestPractice { public static CloseableHttpClient createOptimalHttpClient() { // 1. 连接管理器配置 PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(); // 设置总连接数 connectionManager.setMaxTotal(200); // 设置每个路由的最大连接数 connectionManager.setDefaultMaxPerRoute(50); // 设置连接存活时间 connectionManager.setValidateAfterInactivity(30000); // 30 秒 // 2. 请求配置 RequestConfig requestConfig = RequestConfig.custom() .setConnectTimeout(30000) // 连接超时 .setSocketTimeout(60000) // Socket 超时 .setConnectionRequestTimeout(5000) // 从连接池获取连接超时 .build(); // 3. 构建 HTTP 客户端 return HttpClientBuilder.create() .setConnectionManager(connectionManager) .setDefaultRequestConfig(requestConfig) .setRetryHandler(new DefaultHttpRequestRetryHandler(3, true)) .disableCookieManagement() // 禁用 cookie 管理 .setKeepAliveStrategy(new DefaultConnectionKeepAliveStrategy()) .build(); } /** * 连接池监控 */ public static class ConnectionPoolMonitor { private final PoolingHttpClientConnectionManager connectionManager; private final ScheduledExecutorService monitorExecutor; public ConnectionPoolMonitor(PoolingHttpClientConnectionManager connectionManager) { this.connectionManager = connectionManager; this.monitorExecutor = Executors.newSingleThreadScheduledExecutor(); startMonitoring(); } private void startMonitoring() { monitorExecutor.scheduleAtFixedRate(() -> { try { PoolStats stats = connectionManager.getTotalStats(); System.out.println("=== Connection Pool Stats ==="); System.out.println("Available: " + stats.getAvailable()); System.out.println("Leased: " + stats.getLeased()); System.out.println("Pending: " + stats.getPending()); System.out.println("Max: " + stats.getMax()); // 预警逻辑 if (stats.getAvailable() < 10) { System.err.println("WARNING: Low available connections!"); } // 关闭空闲连接 connectionManager.closeExpiredConnections(); connectionManager.closeIdleConnections(60, TimeUnit.SECONDS); } catch (Exception e) { System.err.println("Monitor error: " + e.getMessage()); } }, 0, 30, TimeUnit.SECONDS); } } }

package com.example.bestpractice; import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicInteger; /** * 异步编程最佳实践 */ public class AsyncBestPractice { /** * 1. 配置合适的线程池 */ public static ExecutorService createOptimalThreadPool() { int corePoolSize = Runtime.getRuntime().availableProcessors(); int maxPoolSize = corePoolSize * 2; return new ThreadPoolExecutor( corePoolSize, maxPoolSize, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(10000), new ThreadFactory() { private final AtomicInteger counter = new AtomicInteger(); @Override public Thread newThread(Runnable r) { Thread t = new Thread(r, "LLM-Worker-" + counter.incrementAndGet()); t.setDaemon(true); t.setUncaughtExceptionHandler((thread, throwable) -> { System.err.println("Uncaught exception in " + thread.getName() + ": " + throwable.getMessage()); }); return t; } }, new ThreadPoolExecutor.AbortPolicy() // 明确拒绝，便于发现瓶颈 ); } /** * 2. 使用 CompletableFuture 的最佳实践 */ public static class CompletableFutureBestPractice { public CompletableFuture<String> processWithTimeout(CompletableFuture<String> future, long timeout, TimeUnit unit) { // 创建超时 future CompletableFuture<String> timeoutFuture = new CompletableFuture<>(); // 设置超时 CompletableFuture.runAsync(() -> { try { Thread.sleep(unit.toMillis(timeout)); timeoutFuture.completeExceptionally(new TimeoutException("Operation timeout")); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } }); // 返回最先完成的 future return future.applyToEither(timeoutFuture, Function.identity()); } public CompletableFuture<List<String>> processBatchWithConcurrencyLimit(List<CompletableFuture<String>> futures, int maxConcurrency) { // 使用 Semaphore 控制并发 Semaphore semaphore = new Semaphore(maxConcurrency); List<CompletableFuture<String>> controlledFutures = futures.stream() .map(future -> CompletableFuture.supplyAsync(() -> { try { semaphore.acquire(); return future.get(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } finally { semaphore.release(); } })) .collect(Collectors.toList()); // 等待所有完成 return CompletableFuture.allOf(controlledFutures.toArray(new CompletableFuture[0])) .thenApply(v -> controlledFutures.stream() .map(CompletableFuture::join) .collect(Collectors.toList())); } } /** * 3. 响应式编程最佳实践 */ public static class ReactiveBestPractice { public Flux<String> processWithBackpressure(Flux<String> source) { return source .onBackpressureBuffer(1000, BufferOverflowStrategy.DROP_LATEST) // 缓冲区大小，溢出策略 .publishOn(Schedulers.parallel()) // 指定调度器 .doOnNext(item -> { // 处理每个元素 }) .doOnError(error -> { // 错误处理 }) .doOnComplete(() -> { // 完成处理 }); } public Mono<String> processWithRetry(Mono<String> mono) { return mono .retryWhen(Retry.backoff(3, Duration.ofSeconds(1)) .maxBackoff(Duration.ofSeconds(10)) .jitter(0.5) .doAfterRetry(retrySignal -> { // 重试后操作 })) .timeout(Duration.ofSeconds(30)) .onErrorResume(error -> { // 错误恢复 return Mono.just("fallback"); }); } } }

package com.example.monitoring; import io.micrometer.core.instrument.*; import io.micrometer.core.instrument.binder.jvm.*; import io.micrometer.core.instrument.binder.system.*; import io.micrometer.prometheus.PrometheusConfig; import io.micrometer.prometheus.PrometheusMeterRegistry; import java.util.concurrent.*; import java.util.concurrent.atomic.*; /** * LangChain4j 性能监控 */ public class PerformanceMonitor { private final MeterRegistry meterRegistry; private final ConcurrentHashMap<String, Timer> timers; private final AtomicLong totalRequests; private final AtomicLong failedRequests; private final AtomicLong activeRequests; public PerformanceMonitor() { this.meterRegistry = new PrometheusMeterRegistry(PrometheusConfig.DEFAULT); this.timers = new ConcurrentHashMap<>(); this.totalRequests = new AtomicLong(); this.failedRequests = new AtomicLong(); this.activeRequests = new AtomicLong(); // 注册指标 registerMetrics(); } private void registerMetrics() { // JVM 指标 new JvmMemoryMetrics().bindTo(meterRegistry); new JvmGcMetrics().bindTo(meterRegistry); new ProcessorMetrics().bindTo(meterRegistry); new JvmThreadMetrics().bindTo(meterRegistry); // 系统指标 new UptimeMetrics().bindTo(meterRegistry); new ProcessorMetrics().bindTo(meterRegistry); // 自定义指标 Gauge.builder("langchain4j.requests.total", totalRequests, AtomicLong::get) .description("Total number of requests") .register(meterRegistry); Gauge.builder("langchain4j.requests.active", activeRequests, AtomicLong::get) .description("Active number of requests") .register(meterRegistry); Gauge.builder("langchain4j.requests.failed", failedRequests, AtomicLong::get) .description("Failed number of requests") .register(meterRegistry); } public Timer.Sample startTimer(String operation) { activeRequests.incrementAndGet(); return Timer.start(meterRegistry); } public void recordSuccess(Timer.Sample sample, String operation) { totalRequests.incrementAndGet(); activeRequests.decrementAndGet(); Timer timer = timers.computeIfAbsent(operation, op -> Timer.builder("langchain4j.operation.duration") .tag("operation", op) .tag("status", "success") .publishPercentiles(0.5, 0.95, 0.99) .register(meterRegistry) ); sample.stop(timer); } public void recordFailure(Timer.Sample sample, String operation, String error) { totalRequests.incrementAndGet(); failedRequests.incrementAndGet(); activeRequests.decrementAndGet(); Timer timer = timers.computeIfAbsent(operation + ".error", op -> Timer.builder("langchain4j.operation.duration") .tag("operation", operation) .tag("status", "error") .tag("error", error) .register(meterRegistry) ); sample.stop(timer); } public void recordTokenUsage(int inputTokens, int outputTokens) { Counter.builder("langchain4j.tokens.total") .tag("type", "input") .register(meterRegistry) .increment(inputTokens); Counter.builder("langchain4j.tokens.total") .tag("type", "output") .register(meterRegistry) .increment(outputTokens); } public String getMetrics() { return ((PrometheusMeterRegistry) meterRegistry).scrape(); } }

package com.example.testing; import dev.langchain4j.model.chat.ChatLanguageModel; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; import java.util.concurrent.*; import java.util.concurrent.atomic.*; /** * LangChain4j 并发性能测试 */ @State(Scope.Benchmark) @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) @Warmup(iterations = 3, time = 5) @Measurement(iterations = 5, time = 10) @Fork(2) @Threads(10) // 并发线程数 public class ConcurrentPerformanceTest { private ChatLanguageModel model; private ExecutorService executorService; private CountDownLatch latch; private AtomicInteger successCount; private AtomicInteger failureCount; @Setup public void setup() { // 初始化模型 model = createChatModel(); // 初始化线程池 executorService = Executors.newFixedThreadPool(50); successCount = new AtomicInteger(); failureCount = new AtomicInteger(); } @Benchmark @BenchmarkMode({Mode.Throughput, Mode.AverageTime}) public void testConcurrentRequests() throws InterruptedException { int concurrentRequests = 100; latch = new CountDownLatch(concurrentRequests); for (int i = 0; i < concurrentRequests; i++) { executorService.submit(() -> { try { model.generate("Test message " + Thread.currentThread().getId()); successCount.incrementAndGet(); } catch (Exception e) { failureCount.incrementAndGet(); } finally { latch.countDown(); } }); } latch.await(30, TimeUnit.SECONDS); } @TearDown public void tearDown() { executorService.shutdown(); System.out.println("Success: " + successCount.get()); System.out.println("Failure: " + failureCount.get()); System.out.println("Success rate: " + (double) successCount.get() / (successCount.get() + failureCount.get())); } public static void main(String[] args) throws RunnerException { Options options = new OptionsBuilder() .include(ConcurrentPerformanceTest.class.getSimpleName()) .build(); new Runner(options).run(); } }

LangChain4j 并发处理与线程安全深度解析