{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":599547518,"defaultBranch":"main","name":"vllm","ownerLogin":"vllm-project","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-02-09T11:23:20.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/136984999?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1718218386.0","currentOid":""},"activityList":{"items":[{"before":"94a07bbdd813a0121d01a852ab03fb2430e73548","after":"7d19de2e9c9a94658c36b55011b803a7991d0335","ref":"refs/heads/main","pushedAt":"2024-06-12T22:42:13.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"mgoin","name":"Michael Goin","path":"/mgoin","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3195154?s=80&v=4"},"commit":{"message":"[Frontend] Add \"input speed\" to tqdm postfix alongside output speed (#5425)","shortMessageHtmlLink":"[Frontend] Add \"input speed\" to tqdm postfix alongside output speed (#…"}},{"before":"b8d4dfff9c29ad6e02bce1fc79c089120b2d34d6","after":"94a07bbdd813a0121d01a852ab03fb2430e73548","ref":"refs/heads/main","pushedAt":"2024-06-12T21:59:44.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Bugfix] Fix typo in scheduler.py (requeset -> request) (#5470)","shortMessageHtmlLink":"[Bugfix] Fix typo in scheduler.py (requeset -> request) (#5470)"}},{"before":"622d45128c02e5296e1177481c65199754eab396","after":"b8d4dfff9c29ad6e02bce1fc79c089120b2d34d6","ref":"refs/heads/main","pushedAt":"2024-06-12T21:49:31.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"youkaichao","name":"youkaichao","path":"/youkaichao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23236638?s=80&v=4"},"commit":{"message":"[Doc] Update debug docs (#5438)","shortMessageHtmlLink":"[Doc] Update debug docs (#5438)"}},{"before":"51602eefd38250325e541abd28f051ffd7676c3f","after":"622d45128c02e5296e1177481c65199754eab396","ref":"refs/heads/main","pushedAt":"2024-06-12T21:46:36.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"youkaichao","name":"youkaichao","path":"/youkaichao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23236638?s=80&v=4"},"commit":{"message":"[misc] add hint for AttributeError (#5462)","shortMessageHtmlLink":"[misc] add hint for AttributeError (#5462)"}},{"before":"5cc50a531f720758025c8493ee85a56272277a54","after":"51602eefd38250325e541abd28f051ffd7676c3f","ref":"refs/heads/main","pushedAt":"2024-06-12T21:13:52.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":" [Frontend] [Core] Support for sharded tensorized models (#4990)\n\nSigned-off-by: Travis Johnson \r\nCo-authored-by: Sanger Steel \r\nCo-authored-by: Roger Wang ","shortMessageHtmlLink":" [Frontend] [Core] Support for sharded tensorized models (#4990)"}},{"before":"5985e3427dc4a10b8483fd08013fa8df563f04fb","after":"5cc50a531f720758025c8493ee85a56272277a54","ref":"refs/heads/main","pushedAt":"2024-06-12T21:08:52.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Bugfix] TYPE_CHECKING for MultiModalData (#5444)","shortMessageHtmlLink":"[Bugfix] TYPE_CHECKING for MultiModalData (#5444)"}},{"before":"8b82a89997826af8e0e4ecfaaed60f3b28b1baed","after":"5985e3427dc4a10b8483fd08013fa8df563f04fb","ref":"refs/heads/main","pushedAt":"2024-06-12T21:07:26.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"pcmoritz","name":"Philipp Moritz","path":"/pcmoritz","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/113316?s=80&v=4"},"commit":{"message":"[Kernel] Vectorized FP8 quantize kernel (#5396)\n\nInspired by #5146, this PR improves FP8 quantize kernel by vectorizing data transfer to better utilize memory bandwidth. Microbenchmark shows that this improved kernel can achieve 1.0x-1.5x speedup (especially when hidden size is large).\r\n\r\nIn details, we applied 3 optimizations:\r\n\r\n- Use inverted scale so that most divisions are changed to multiplications.\r\n- Unroll the loop by 4 times to improve ILP.\r\n- Use vectorized 4 to transfer data between HBM and SRAM.","shortMessageHtmlLink":"[Kernel] Vectorized FP8 quantize kernel (#5396)"}},{"before":"c3c2903e72c6e85a81ff6de8b879f4c82e8ad364","after":"8b82a89997826af8e0e4ecfaaed60f3b28b1baed","ref":"refs/heads/main","pushedAt":"2024-06-12T21:00:18.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[ci] Add AMD, Neuron, Intel tests for AWS CI and turn off default soft fail for GPU tests (#5464)\n\nSigned-off-by: kevin ","shortMessageHtmlLink":"[ci] Add AMD, Neuron, Intel tests for AWS CI and turn off default sof…"}},{"before":"1a8bfd92d5f35d638e3cfc8c4cd1779aeda0adfb","after":"c3c2903e72c6e85a81ff6de8b879f4c82e8ad364","ref":"refs/heads/main","pushedAt":"2024-06-12T19:58:53.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[Bugfix] Add device assertion to TorchSDPA (#5402)","shortMessageHtmlLink":"[Bugfix] Add device assertion to TorchSDPA (#5402)"}},{"before":"f5e1bf5d44877149eaabf9c04379a4e14a023145","after":null,"ref":"refs/heads/torch-xla","pushedAt":"2024-06-12T18:53:06.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"}},{"before":"847cdcca1c94b12e6c118dbf863e4b111d1b4fd2","after":"1a8bfd92d5f35d638e3cfc8c4cd1779aeda0adfb","ref":"refs/heads/main","pushedAt":"2024-06-12T18:53:03.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[Hardware] Initial TPU integration (#5292)","shortMessageHtmlLink":"[Hardware] Initial TPU integration (#5292)"}},{"before":"034b9bdb099d501959b3fa4877298b1a32429712","after":"f5e1bf5d44877149eaabf9c04379a4e14a023145","ref":"refs/heads/torch-xla","pushedAt":"2024-06-12T18:49:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Add build dependencies","shortMessageHtmlLink":"Add build dependencies"}},{"before":"b4aa40347f49ac785b5243c7b5b1ca0b7f421f84","after":"034b9bdb099d501959b3fa4877298b1a32429712","ref":"refs/heads/torch-xla","pushedAt":"2024-06-12T18:40:31.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Fix for v5p","shortMessageHtmlLink":"Fix for v5p"}},{"before":"4be5a3c71244246d68f0b29e34b283679bb3d6a1","after":"b4aa40347f49ac785b5243c7b5b1ca0b7f421f84","ref":"refs/heads/torch-xla","pushedAt":"2024-06-12T18:03:42.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Address comments","shortMessageHtmlLink":"Address comments"}},{"before":"e3c12bf6d22999cfbe267a7c788f6875340616cd","after":"847cdcca1c94b12e6c118dbf863e4b111d1b4fd2","ref":"refs/heads/main","pushedAt":"2024-06-12T17:06:14.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[CI] Upgrade codespell version. (#5381)","shortMessageHtmlLink":"[CI] Upgrade codespell version. (#5381)"}},{"before":"3dd6853bc8c4fb8bbaf507c1699e5cbe8fa356ad","after":"e3c12bf6d22999cfbe267a7c788f6875340616cd","ref":"refs/heads/main","pushedAt":"2024-06-12T17:03:24.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"Revert \"[CI/Build] Add `is_quant_method_supported` to control quantization test configurations\" (#5463)","shortMessageHtmlLink":"Revert \"[CI/Build] Add is_quant_method_supported to control quantiz…"}},{"before":null,"after":"500d0f092a99fc2b25054f6aaf8e8d32273f811a","ref":"refs/heads/revert-5253-refactor-checking-supported-quant-method","pushedAt":"2024-06-12T17:03:02.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"Revert \"[CI/Build] Add `is_quant_method_supported` to control quantization te…\"\n\nThis reverts commit 3dd6853bc8c4fb8bbaf507c1699e5cbe8fa356ad.","shortMessageHtmlLink":"Revert \"[CI/Build] Add is_quant_method_supported to control quantiz…"}},{"before":"8f89d72090da70895d77d32248ea8504f7daba50","after":"3dd6853bc8c4fb8bbaf507c1699e5cbe8fa356ad","ref":"refs/heads/main","pushedAt":"2024-06-12T16:58:02.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"[CI/Build] Add `is_quant_method_supported` to control quantization test configurations (#5253)","shortMessageHtmlLink":"[CI/Build] Add is_quant_method_supported to control quantization te…"}},{"before":"cb5e4f63f8fec98fc09f40ee6e4075167499fe68","after":"4be5a3c71244246d68f0b29e34b283679bb3d6a1","ref":"refs/heads/torch-xla","pushedAt":"2024-06-11T18:16:29.000Z","pushType":"push","commitsCount":32,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Merge branch 'main' into torch-xla","shortMessageHtmlLink":"Merge branch 'main' into torch-xla"}},{"before":"99dac099ab5205d40bfaf5cf5652884b8764a400","after":"8f89d72090da70895d77d32248ea8504f7daba50","ref":"refs/heads/main","pushedAt":"2024-06-11T18:12:14.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Doc] add common case for long waiting time (#5430)","shortMessageHtmlLink":"[Doc] add common case for long waiting time (#5430)"}},{"before":"c4bd03c7c5672b6a5d3d6839339853e04fe15127","after":"99dac099ab5205d40bfaf5cf5652884b8764a400","ref":"refs/heads/main","pushedAt":"2024-06-11T18:10:42.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Core][Doc] Default to multiprocessing for single-node distributed case (#5230)\n\nCo-authored-by: Antoni Baum ","shortMessageHtmlLink":"[Core][Doc] Default to multiprocessing for single-node distributed ca…"}},{"before":"dcbf4286afbff55d836b1c69bd2b4705f0082ddb","after":"c4bd03c7c5672b6a5d3d6839339853e04fe15127","ref":"refs/heads/main","pushedAt":"2024-06-11T17:54:00.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Core][Distributed] add same-node detection (#5369)","shortMessageHtmlLink":"[Core][Distributed] add same-node detection (#5369)"}},{"before":"0e0de1c7d53e62d9c3aa0fff88ef20c882ab848e","after":"cb5e4f63f8fec98fc09f40ee6e4075167499fe68","ref":"refs/heads/torch-xla","pushedAt":"2024-06-11T17:47:04.000Z","pushType":"push","commitsCount":5,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Use enforce-eager to skip warmup","shortMessageHtmlLink":"Use enforce-eager to skip warmup"}},{"before":"00e6a2dc535c89ac7c92551ef9b92acd8664df02","after":"dcbf4286afbff55d836b1c69bd2b4705f0082ddb","ref":"refs/heads/main","pushedAt":"2024-06-11T17:42:26.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Frontend] Customizable RoPE theta (#5197)","shortMessageHtmlLink":"[Frontend] Customizable RoPE theta (#5197)"}},{"before":"2e02311a1b33b4fc21179813c56b444f1be10d53","after":"00e6a2dc535c89ac7c92551ef9b92acd8664df02","ref":"refs/heads/main","pushedAt":"2024-06-11T17:40:23.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Bugfix] fix lora_dtype value type in arg_utils.py (#5398)","shortMessageHtmlLink":"[Bugfix] fix lora_dtype value type in arg_utils.py (#5398)"}},{"before":"89ec06c33b9b3e64a6562a82049de44cbf7f9e09","after":"2e02311a1b33b4fc21179813c56b444f1be10d53","ref":"refs/heads/main","pushedAt":"2024-06-11T17:38:07.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Bugfix] Fix `MultiprocessingGPUExecutor.check_health` when world_size == 1 (#5254)","shortMessageHtmlLink":"[Bugfix] Fix MultiprocessingGPUExecutor.check_health when world_siz…"}},{"before":"9fde251bf0b5262ce31eec78f851346b32f684da","after":"89ec06c33b9b3e64a6562a82049de44cbf7f9e09","ref":"refs/heads/main","pushedAt":"2024-06-11T17:31:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Docs] [Spec decode] Fix docs error in code example (#5427)","shortMessageHtmlLink":"[Docs] [Spec decode] Fix docs error in code example (#5427)"}},{"before":"4c2ffb28ffe7270b49ac7cf5324978950a28e7e1","after":"9fde251bf0b5262ce31eec78f851346b32f684da","ref":"refs/heads/main","pushedAt":"2024-06-11T17:24:59.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Doc] Add an automatic prefix caching section in vllm documentation (#5324)\n\nCo-authored-by: simon-mo ","shortMessageHtmlLink":"[Doc] Add an automatic prefix caching section in vllm documentation (#…"}},{"before":"246598a6b1e22616630b7f1bf11bd9bcb31dc860","after":"4c2ffb28ffe7270b49ac7cf5324978950a28e7e1","ref":"refs/heads/main","pushedAt":"2024-06-11T17:15:40.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"LiuXiaoxuanPKU","name":"Lily Liu","path":"/LiuXiaoxuanPKU","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16137495?s=80&v=4"},"commit":{"message":"[Speculative decoding] Initial spec decode docs (#5400)","shortMessageHtmlLink":"[Speculative decoding] Initial spec decode docs (#5400)"}},{"before":"8bab4959bea640f8f81ca59eb06b1f056ac23111","after":"246598a6b1e22616630b7f1bf11bd9bcb31dc860","ref":"refs/heads/main","pushedAt":"2024-06-11T08:28:51.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":"[CI] docfix (#5410)\n\nCo-authored-by: DarkLight1337 \r\nCo-authored-by: ywang96 ","shortMessageHtmlLink":"[CI] docfix (#5410)"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEY8-5tQA","startCursor":null,"endCursor":null}},"title":"Activity · vllm-project/vllm"}