{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":547806116,"defaultBranch":"main","name":"text-generation-inference","ownerLogin":"huggingface","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2022-10-08T10:26:28.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/25720743?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1718212341.0","currentOid":""},"activityList":{"items":[{"before":null,"after":"c1e052830181c882e57dc875bbf620627e6f8b44","ref":"refs/heads/derive-sharded-from-args","pushedAt":"2024-06-12T17:12:21.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"drbh","name":"drbh","path":"/drbh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/9896130?s=80&v=4"},"commit":{"message":"fix: set sharded true if WORLD_SIZE is set","shortMessageHtmlLink":"fix: set sharded true if WORLD_SIZE is set"}},{"before":"05eb4dcb1739868b63ed540fea9129b52a2242b5","after":"abe521204ec97030f43e55409b960c404a8c3fa2","ref":"refs/heads/feat/page_re_alloc","pushedAt":"2024-06-12T16:54:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"fix tests","shortMessageHtmlLink":"fix tests"}},{"before":"9ac7b7bc521495b3b6335240d9b3311c79a47c7f","after":"05eb4dcb1739868b63ed540fea9129b52a2242b5","ref":"refs/heads/feat/page_re_alloc","pushedAt":"2024-06-12T16:53:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"allocate 16 by 16","shortMessageHtmlLink":"allocate 16 by 16"}},{"before":"82302262caa3ece3049ff6f061aa155bc64ab4ea","after":null,"ref":"refs/heads/fix/phi_mini","pushedAt":"2024-06-12T16:24:48.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"}},{"before":"521de6cacd2af42caa1f93c75a34460a6ecddf9e","after":"90184df79c12ec2aa9111248077e237ca2ba9ee9","ref":"refs/heads/main","pushedAt":"2024-06-12T16:24:47.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"fix(layers): fix SuRotaryEmbedding (#2060)\n\n* fix(layers): fix SuRotaryEmbedding\r\n\r\n* change arange\r\n\r\n* remove logs","shortMessageHtmlLink":"fix(layers): fix SuRotaryEmbedding (#2060)"}},{"before":"bbebdffa6ae55933715a19bcfb328f702c191106","after":null,"ref":"refs/heads/fix/old_implem","pushedAt":"2024-06-12T16:22:21.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"}},{"before":"376a0b7ada91548a68798383cb008ea01c728b30","after":"521de6cacd2af42caa1f93c75a34460a6ecddf9e","ref":"refs/heads/main","pushedAt":"2024-06-12T16:22:21.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"fix(server): fix OPT implementation (#2061)","shortMessageHtmlLink":"fix(server): fix OPT implementation (#2061)"}},{"before":"c2fb459bc1a3a207308243f5fcc32bf6781618d0","after":"9ac7b7bc521495b3b6335240d9b3311c79a47c7f","ref":"refs/heads/feat/page_re_alloc","pushedAt":"2024-06-12T16:21:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"remove slots from grpc","shortMessageHtmlLink":"remove slots from grpc"}},{"before":null,"after":"bbebdffa6ae55933715a19bcfb328f702c191106","ref":"refs/heads/fix/old_implem","pushedAt":"2024-06-12T16:11:31.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"fix(server): fix OPT implementation","shortMessageHtmlLink":"fix(server): fix OPT implementation"}},{"before":"9775facbf798c3e85a9b73aa83bd5d066cf82533","after":"82302262caa3ece3049ff6f061aa155bc64ab4ea","ref":"refs/heads/fix/phi_mini","pushedAt":"2024-06-12T15:50:57.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"remove logs","shortMessageHtmlLink":"remove logs"}},{"before":"9cc16725bf41c860b17cfc6699b827008ae625d1","after":"9775facbf798c3e85a9b73aa83bd5d066cf82533","ref":"refs/heads/fix/phi_mini","pushedAt":"2024-06-12T15:47:50.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"change arange","shortMessageHtmlLink":"change arange"}},{"before":null,"after":"9cc16725bf41c860b17cfc6699b827008ae625d1","ref":"refs/heads/fix/phi_mini","pushedAt":"2024-06-12T15:09:19.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"fix(layers): fix SuRotaryEmbedding","shortMessageHtmlLink":"fix(layers): fix SuRotaryEmbedding"}},{"before":null,"after":"4ed551abbaa6d177f99787a81cdeec5168332525","ref":"refs/heads/feature/phi-3-small","pushedAt":"2024-06-12T15:04:28.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"},"commit":{"message":"WIP, many bits are still missing...\n\nSo this won't work yet.","shortMessageHtmlLink":"WIP, many bits are still missing..."}},{"before":null,"after":"c0f201c9d3f71f5f7aa1caf7c6fe7cf50c46d477","ref":"refs/heads/maintenance/packed-sharded-refactor","pushedAt":"2024-06-12T14:24:43.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"},"commit":{"message":"Factor out sharding of packed tensors\n\nFor Phi-3-Small I need to shard a packed QKV bias tensor, for which\nI implemented the `Weights.get_packed_sharded` method. However, this\nmethod can also replace the `Weights._get_qweight` method and the\ncustom sharding code from `Weights.get_weights_col_packed`.","shortMessageHtmlLink":"Factor out sharding of packed tensors"}},{"before":"b9b5051abc197e5e13e47054591103e62fd06791","after":null,"ref":"refs/heads/bugfix/preserve-quantized-dtype","pushedAt":"2024-06-12T13:00:21.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"}},{"before":"f0187efe776dc6657720055cb5335b7be93899fe","after":"fd32b586423e82b4e93b77da28e7a18eba1ca22f","ref":"refs/heads/feature/marlin-gptq","pushedAt":"2024-06-12T11:59:09.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"},"commit":{"message":"Add support for GPTQ Marlin kernels\n\nGPTQ Marlin extends the Marlin kernels to support common GPTQ\nconfigurations:\n\n- bits: 4 or 8\n- groupsize: -1, 32, 64, or 128\n- desc_act: true/false\n\nUsing the GPTQ Marlin kernels requires repacking the parameters in the\nMarlin quantizer format.\n\nThe kernels were contributed by Neural Magic to VLLM. We vendor them\nhere for convenience.","shortMessageHtmlLink":"Add support for GPTQ Marlin kernels"}},{"before":"5ef9d0b1bfbdc3126149b78b82ed80439cdc751c","after":"f0187efe776dc6657720055cb5335b7be93899fe","ref":"refs/heads/feature/marlin-gptq","pushedAt":"2024-06-12T10:19:11.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"},"commit":{"message":"Some renaming","shortMessageHtmlLink":"Some renaming"}},{"before":"e6e87a2e2699293c9ca9d93fa6a836c4a5d176b3","after":"884ebabfd361f961767893b2ab908971c6cb46b2","ref":"refs/heads/pr-2049-ci-run","pushedAt":"2024-06-11T18:46:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"drbh","name":"drbh","path":"/drbh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/9896130?s=80&v=4"},"commit":{"message":"fix: cargo fmt lint for pre commit","shortMessageHtmlLink":"fix: cargo fmt lint for pre commit"}},{"before":"37266e2dbb8b5f52cff80cf837a5e479c51172fe","after":"c2fb459bc1a3a207308243f5fcc32bf6781618d0","ref":"refs/heads/feat/page_re_alloc","pushedAt":"2024-06-11T16:47:50.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"fix windowing","shortMessageHtmlLink":"fix windowing"}},{"before":"3512f0765751694d41b24a13254c691a61cc877d","after":"5ef9d0b1bfbdc3126149b78b82ed80439cdc751c","ref":"refs/heads/feature/marlin-gptq","pushedAt":"2024-06-11T15:59:36.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"},"commit":{"message":"bugfix","shortMessageHtmlLink":"bugfix"}},{"before":"73c39032142f7bc7243402e7ec9fa9f25e4bbcb3","after":"37266e2dbb8b5f52cff80cf837a5e479c51172fe","ref":"refs/heads/feat/page_re_alloc","pushedAt":"2024-06-11T15:14:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"OlivierDehaene","name":null,"path":"/OlivierDehaene","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23298448?s=80&v=4"},"commit":{"message":"fix rust and python unit-tests","shortMessageHtmlLink":"fix rust and python unit-tests"}},{"before":"705fc637e72df58ea52fc6daecd77ee6e1d11178","after":"e6e87a2e2699293c9ca9d93fa6a836c4a5d176b3","ref":"refs/heads/pr-2049-ci-run","pushedAt":"2024-06-11T14:46:59.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"drbh","name":"drbh","path":"/drbh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/9896130?s=80&v=4"},"commit":{"message":"Use minijinja's pycompat mode for python methods","shortMessageHtmlLink":"Use minijinja's pycompat mode for python methods"}},{"before":"4ce8494ceb1df3b30ed00f2082a84733db0ff5eb","after":null,"ref":"refs/heads/support-chat-response-format","pushedAt":"2024-06-11T14:44:56.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"drbh","name":"drbh","path":"/drbh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/9896130?s=80&v=4"}},{"before":"a6e4d63c86f4eeaae2ba1337a39f19d03bbd2277","after":"376a0b7ada91548a68798383cb008ea01c728b30","ref":"refs/heads/main","pushedAt":"2024-06-11T14:44:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"drbh","name":"drbh","path":"/drbh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/9896130?s=80&v=4"},"commit":{"message":"Support chat response format (#2046)\n\n* feat: support response_format in chat\r\n\r\n* fix: adjust typos\r\n\r\n* fix: add trufflehog lint","shortMessageHtmlLink":"Support chat response format (#2046)"}},{"before":"a6e4d63c86f4eeaae2ba1337a39f19d03bbd2277","after":"705fc637e72df58ea52fc6daecd77ee6e1d11178","ref":"refs/heads/pr-2049-ci-run","pushedAt":"2024-06-11T14:00:56.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"drbh","name":"drbh","path":"/drbh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/9896130?s=80&v=4"},"commit":{"message":"Merge commit 'refs/pull/2049/head' of github.com:huggingface/text-generation-inference into pr-2049-ci-run","shortMessageHtmlLink":"Merge commit 'refs/pull/2049/head' of github.com:huggingface/text-gen…"}},{"before":null,"after":"a6e4d63c86f4eeaae2ba1337a39f19d03bbd2277","ref":"refs/heads/pr-2049-ci-run","pushedAt":"2024-06-11T14:00:03.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"drbh","name":"drbh","path":"/drbh","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/9896130?s=80&v=4"},"commit":{"message":"Update LLMM1 bound (#2050)\n\nupdate commit","shortMessageHtmlLink":"Update LLMM1 bound (#2050)"}},{"before":"dadfff621efc6582a7aee358b0faadf6844c8f4c","after":"7c7470542d2a9aa55fcdcb8de03e36a8689b4ca0","ref":"refs/heads/ci_amd2","pushedAt":"2024-06-11T13:40:38.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"fxmarty","name":null,"path":"/fxmarty","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/9808326?s=80&v=4"},"commit":{"message":"fix tests","shortMessageHtmlLink":"fix tests"}},{"before":"6b68dbdb107dbf1145de97ad9e53754ad6cba3f1","after":"3512f0765751694d41b24a13254c691a61cc877d","ref":"refs/heads/feature/marlin-gptq","pushedAt":"2024-06-11T13:26:20.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"},"commit":{"message":"Add support for GPTQ Marlin","shortMessageHtmlLink":"Add support for GPTQ Marlin"}},{"before":"9de5b348a6790913ee6bee1dd535a679314efc9a","after":"6b68dbdb107dbf1145de97ad9e53754ad6cba3f1","ref":"refs/heads/feature/marlin-gptq","pushedAt":"2024-06-11T12:57:32.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"},"commit":{"message":"Add support for GPTQ Marlin","shortMessageHtmlLink":"Add support for GPTQ Marlin"}},{"before":"0a2f2eb2ae3068f6798d6a96a0cfd542ff9b9f96","after":"9de5b348a6790913ee6bee1dd535a679314efc9a","ref":"refs/heads/feature/marlin-gptq","pushedAt":"2024-06-11T12:18:20.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"danieldk","name":"Daniël de Kok","path":"/danieldk","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49398?s=80&v=4"},"commit":{"message":"Fix up a bunch of models, tighten checks","shortMessageHtmlLink":"Fix up a bunch of models, tighten checks"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEY5KwngA","startCursor":null,"endCursor":null}},"title":"Activity · huggingface/text-generation-inference"}