command:
python ./start_up.py \
--replicas_ip_port "127.0.0.1:8081,127.0.0.1:8082,127.0.0.1:8083,127.0.0.1:8084,127.0.0.1:8085,127.0.0.1:8086,127.0.0.1:8087,127.0.0.1:8088"
--model_path "/local/transformer_models/Qwen3-32B-FP8"
--model_name "Qwen3-32B"
--max_model_len 20480
--block_size 128
--kv_cache_size_per_token 57344
--replica_dram_size 64
--qps 10
--prefill_tpot 0.00016
--global_scheduler_type "cache_affinity"
--ttft_slo 5
--request_num 8000
--dataset_file "./evaluation/dataset/mooncake/processed_toolagent_trace.jsonl"
--dataset_type "toolagent"
output:
DEBUG 04-15 08:51:42 start_up.py:659] === Start experiment cache_affinity /10.0 ===
INFO 04-15 08:51:42 launcher.py:42] init_router: cache_affinity
INFO 04-15 08:51:42 request_generator.py:140] Generating requests from file: ./evaluation/dataset/mooncake/processed_toolagent_trace.jsonl
INFO 04-15 08:51:42 request_generator.py:143] Reading file: ./evaluation/dataset/mooncake/processed_toolagent_trace.jsonl
INFO 04-15 08:51:42 request_generator.py:97] Generating request: 0, 1776220873.811906@01@0, 1776220873.811906@0, 1
INFO 04-15 08:51:42 cache_affinity_global_scheduler.py:24] schedule: 1776220873.811906@0 -> 3
INFO 04-15 08:51:42 shared.py:117] Adding task for request 0 to replica 3,session_id=1
INFO 04-15 08:51:42 open_ai.py:70] async_send_request:request._id=0,session=1,127.0.0.1:8084
DEBUG 04-15 08:51:42 replica.py:154] add_request - replica_id=[3], request=0,self.replica_slo_budget-sum_pending_tokens==current_budget:True,15624-6758==8866
data: {'id': 'f0f60bf4e84e49d2a39b95091f57fbcd', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fp', 'created': 1776243102, 'model': 'Qwen3-32B', 'choices': [{'index': 0, 'delta': {'role': 'assistant', 'content': '', 'reasoning_content': None, 'tool_calls': None}, 'logprobs': None, 'finish_reason': None, 'matched_stop': None}], 'usage': None}
DEBUG 04-15 08:51:42 open_ai.py:155] async_send_request:Exception: 127.0.0.1:8084: Traceback (most recent call last):
DEBUG 04-15 08:51:42 open_ai.py:155] File "/local/DualMap/dualmap/client/open_ai.py", line 126, in async_send_request
DEBUG 04-15 08:51:42 open_ai.py:155] output_token_len = data["usage"]["completion_tokens"]
DEBUG 04-15 08:51:42 open_ai.py:155] ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^
DEBUG 04-15 08:51:42 open_ai.py:155] TypeError: 'NoneType' object is not subscriptable
DEBUG 04-15 08:51:42 open_ai.py:155]
DEBUG 04-15 08:51:42 open_ai.py:186] failed:request._id=0,session=1,127.0.0.1:8084
DEBUG 04-15 08:51:42 open_ai.py:187] bad request:127.0.0.1:8084:good,request._id=0,input=6758,output=500
DEBUG 04-15 08:51:42 open_ai.py:188] output failed:request._id=0,session=1,127.0.0.1:8084
DEBUG 04-15 08:51:42 replica.py:173] abort_request - Pending request IDs:replica_id=[3], request=0: []
DEBUG 04-15 08:51:42 replica.py:175] abort_request - pending_tokens:replica_id=[3], request=0, num_pending_req=0: []
DEBUG 04-15 08:51:42 replica.py:176] abort_request - replica_id=[3], request=0,self.replica_slo_budget-sum_pending_tokens==current_budget:True,15624-0==15624
command:
python ./start_up.py \
output:
DEBUG 04-15 08:51:42 start_up.py:659] === Start experiment cache_affinity /10.0 ===
INFO 04-15 08:51:42 launcher.py:42] init_router: cache_affinity
INFO 04-15 08:51:42 request_generator.py:140] Generating requests from file: ./evaluation/dataset/mooncake/processed_toolagent_trace.jsonl
INFO 04-15 08:51:42 request_generator.py:143] Reading file: ./evaluation/dataset/mooncake/processed_toolagent_trace.jsonl
INFO 04-15 08:51:42 request_generator.py:97] Generating request: 0, 1776220873.811906@01@0, 1776220873.811906@0, 1
INFO 04-15 08:51:42 cache_affinity_global_scheduler.py:24] schedule: 1776220873.811906@0 -> 3
INFO 04-15 08:51:42 shared.py:117] Adding task for request 0 to replica 3,session_id=1
INFO 04-15 08:51:42 open_ai.py:70] async_send_request:request._id=0,session=1,127.0.0.1:8084
DEBUG 04-15 08:51:42 replica.py:154] add_request - replica_id=[3], request=0,self.replica_slo_budget-sum_pending_tokens==current_budget:True,15624-6758==8866
data: {'id': 'f0f60bf4e84e49d2a39b95091f57fbcd', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fp', 'created': 1776243102, 'model': 'Qwen3-32B', 'choices': [{'index': 0, 'delta': {'role': 'assistant', 'content': '', 'reasoning_content': None, 'tool_calls': None}, 'logprobs': None, 'finish_reason': None, 'matched_stop': None}], 'usage': None}
DEBUG 04-15 08:51:42 open_ai.py:155] async_send_request:Exception: 127.0.0.1:8084: Traceback (most recent call last):
DEBUG 04-15 08:51:42 open_ai.py:155] File "/local/DualMap/dualmap/client/open_ai.py", line 126, in async_send_request
DEBUG 04-15 08:51:42 open_ai.py:155] output_token_len = data["usage"]["completion_tokens"]
DEBUG 04-15 08:51:42 open_ai.py:155] ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^
DEBUG 04-15 08:51:42 open_ai.py:155] TypeError: 'NoneType' object is not subscriptable
DEBUG 04-15 08:51:42 open_ai.py:155]
DEBUG 04-15 08:51:42 open_ai.py:186] failed:request._id=0,session=1,127.0.0.1:8084
DEBUG 04-15 08:51:42 open_ai.py:187] bad request:127.0.0.1:8084:good,request._id=0,input=6758,output=500
DEBUG 04-15 08:51:42 open_ai.py:188] output failed:request._id=0,session=1,127.0.0.1:8084
DEBUG 04-15 08:51:42 replica.py:173] abort_request - Pending request IDs:replica_id=[3], request=0: []
DEBUG 04-15 08:51:42 replica.py:175] abort_request - pending_tokens:replica_id=[3], request=0, num_pending_req=0: []
DEBUG 04-15 08:51:42 replica.py:176] abort_request - replica_id=[3], request=0,self.replica_slo_budget-sum_pending_tokens==current_budget:True,15624-0==15624