{"payload":{"pageCount":3,"repositories":[{"type":"Public","name":"ChartAst","owner":"OpenGVLab","isFork":false,"description":" ChartAssistant is a chart-based vision-language model for universal chart comprehension and reasoning.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":12,"starsCount":100,"forksCount":8,"license":"Other","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,26,3,1,0,0,3,2,5,0,1,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,1,1,3,1,0,0,2],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-07T09:31:16.410Z"}},{"type":"Public","name":"Ask-Anything","owner":"OpenGVLab","isFork":false,"description":"[CVPR2024 Highlight][VideoChatGPT] ChatGPT with video understanding! And many more supported LMs such as miniGPT4, StableLM, and MOSS.","allTopics":["chat","video","gradio","big-model","video-understanding","captioning-videos","video-question-answering","foundation-models","large-model","large-language-models","chatgpt","langchain","stablelm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":4,"issueCount":102,"starsCount":2971,"forksCount":244,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,11,5,3,3,0,0,1,1,0,2,0,0,0,4,0,1,2,0,7,0,0,0,0,0,0,11,2,5,4,4,1,3,0,0,0,1,1,3,2,0,13],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-05T01:56:42.312Z"}},{"type":"Public","name":"EgoExoLearn","owner":"OpenGVLab","isFork":false,"description":"[CVPR 2024] Data and benchmark code for the EgoExoLearn dataset","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":43,"forksCount":0,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,12,0,0,1,2,0,0,0,0,1,0,0,0,0,0,0,2,0,0,3,4,0,0,1],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-03T07:37:14.096Z"}},{"type":"Public","name":"OmniCorpus","owner":"OpenGVLab","isFork":false,"description":"OmniCorpus: A Unified Multimodal Corpus of 10 Billion-Level Images Interleaved with Text","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":243,"forksCount":5,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,3,1,1,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-29T11:28:12.678Z"}},{"type":"Public","name":"InternVideo","owner":"OpenGVLab","isFork":false,"description":"[ECCV2024] Video Foundation Models &amp; Data for Multimodal Understanding","allTopics":["benchmark","action-recognition","video-understanding","video-data","self-supervised","multimodal","video-dataset","open-set-recognition","video-retrieval","video-question-answering","masked-autoencoder","temporal-action-localization","contrastive-learning","spatio-temporal-action-localization","zero-shot-retrieval","video-clip","vision-transformer","zero-shot-classification","foundation-models","instruction-tuning"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":3,"issueCount":81,"starsCount":1291,"forksCount":84,"license":"Apache License 2.0","participation":[1,0,0,0,0,0,11,2,0,0,2,2,1,0,0,0,0,0,0,7,2,0,0,0,0,0,0,0,9,2,5,3,9,2,0,2,2,2,4,0,0,0,0,1,0,4,0,13,9,4,1,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-27T12:08:53.030Z"}},{"type":"Public","name":"EfficientQAT","owner":"OpenGVLab","isFork":false,"description":"EfficientQAT: Efficient Quantization-Aware Training for Large Language Models","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":177,"forksCount":13,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,2,1,3,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-23T12:05:34.689Z"}},{"type":"Public","name":"Vision-RWKV","owner":"OpenGVLab","isFork":false,"description":"Vision-RWKV: Efficient and Scalable Visual Perception with RWKV-Like Architectures","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":15,"starsCount":325,"forksCount":14,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,4,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-21T06:03:18.378Z"}},{"type":"Public","name":"InternGPT","owner":"OpenGVLab","isFork":false,"description":"InternGPT (iGPT) is an open source demo platform where you can easily showcase your AI models. Now it supports DragGAN, ChatGPT, ImageBind, multimodal chat like GPT-4, SAM, interactive image editing, etc. Try it at igpt.opengvlab.com (支持DragGAN、ChatGPT、ImageBind、SAM的在线Demo系统)","allTopics":["sam","click","vqa","image-captioning","llama","gpt","gradio","husky","multimodal","video-generation","vicuna","gpt-4","llm","chatgpt","langchain","foundation-model","segment-anything","internimage","imagebind","draggan"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":17,"starsCount":3188,"forksCount":231,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-20T12:51:03.109Z"}},{"type":"Public","name":"InternVL","owner":"OpenGVLab","isFork":false,"description":"[CVPR 2024 Oral] InternVL Family: A Pioneering Open-Source Alternative to GPT-4o. 接近GPT-4o表现的开源多模态对话模型","allTopics":["image-classification","gpt","multi-modal","semantic-segmentation","video-classification","image-text-retrieval","llm","vision-language-model","gpt-4v","vit-6b","vit-22b","gpt-4o"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":5,"issueCount":71,"starsCount":5419,"forksCount":422,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,7,1,3,6,5,8,0,3,6,6,5,0,1,1,0,1,1,9,11,6,14,7,7,4,0,0,0,0,3,11,27,16,5,0,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-20T05:07:46.932Z"}},{"type":"Public","name":"STM-Evaluation","owner":"OpenGVLab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":69,"forksCount":6,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-19T17:17:00.120Z"}},{"type":"Public","name":".github","owner":"OpenGVLab","isFork":false,"description":"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":null,"participation":[0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,14,2,0,0,0,0,0,0,0,0,2,0,0,11,0,0,2,0,0,1,1,0,1,0,0,0,0,1,0,1,0,0,2,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-16T09:51:48.135Z"}},{"type":"Public","name":"MMIU","owner":"OpenGVLab","isFork":false,"description":"MMIU: Multimodal Multi-image Understanding for Evaluating Large Vision-Language Models","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":34,"forksCount":1,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,4,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-14T02:28:02.974Z"}},{"type":"Public","name":"PIIP","owner":"OpenGVLab","isFork":false,"description":"Parameter-Inverted Image Pyramid Networks (PIIP)","allTopics":["computer-vision","image-classification","object-detection","semantic-segmentation","instance-segmentation","vision-transformer"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":45,"forksCount":2,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,4,0,5,0,0,0,0,1,0,1,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-13T08:17:35.430Z"}},{"type":"Public","name":"all-seeing","owner":"OpenGVLab","isFork":false,"description":"[ICLR 2024 &amp; ECCV 2024] The All-Seeing Projects: Towards Panoptic Visual Recognition&amp;Understanding and General Relation Comprehension of the Open World\"","allTopics":["dataset","all-seeing","region-text"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":8,"starsCount":443,"forksCount":14,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,4,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,4,1,0,0,0,1,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-09T06:39:51.297Z"}},{"type":"Public","name":"Diffree","owner":"OpenGVLab","isFork":false,"description":"Diffree: Text-Guided Shape Free Object Inpainting with Diffusion Model","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":204,"forksCount":13,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,5,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-06T02:24:30.287Z"}},{"type":"Public","name":"InternImage","owner":"OpenGVLab","isFork":false,"description":"[CVPR 2023 Highlight] InternImage: Exploring Large-Scale Vision Foundation Models with Deformable Convolutions","allTopics":["backbone","semantic-segmentation","deformable-convolution","foundation-model","object-detection"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":8,"issueCount":183,"starsCount":2472,"forksCount":231,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-01T07:03:16.643Z"}},{"type":"Public","name":"OmniQuant","owner":"OpenGVLab","isFork":false,"description":"[ICLR2024 spotlight] OmniQuant is a simple and powerful quantization technique for LLMs. ","allTopics":["quantization","large-language-models","llm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":30,"starsCount":667,"forksCount":51,"license":"MIT License","participation":[8,1,2,0,0,0,0,2,2,0,0,2,0,6,0,8,0,0,7,0,2,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-24T10:51:17.263Z"}},{"type":"Public","name":"MMT-Bench","owner":"OpenGVLab","isFork":false,"description":"ICML'2024 | MMT-Bench: A Comprehensive Multimodal Benchmark for Evaluating Large Vision-Language Models Towards Multitask AGI","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":5,"starsCount":83,"forksCount":2,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-18T03:44:38.403Z"}},{"type":"Public","name":"ControlLLM","owner":"OpenGVLab","isFork":false,"description":"ControlLLM: Augment Language Models with Tools by Searching on Graphs","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":6,"starsCount":183,"forksCount":9,"license":null,"participation":[0,0,0,0,0,0,1,7,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-15T13:55:47.629Z"}},{"type":"Public","name":"GUI-Odyssey","owner":"OpenGVLab","isFork":false,"description":"GUI Odyssey is a comprehensive dataset for training and evaluating cross-app navigation agents. GUI Odyssey consists of 7,735 episodes from 6 mobile devices, spanning 6 types of cross-app tasks, 201 apps, and 1.4K app combos.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":55,"forksCount":2,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,10,0,11,7,1,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-10T07:10:11.142Z"}},{"type":"Public","name":"HumanBench","owner":"OpenGVLab","isFork":false,"description":"This repo is official implementation of HumanBench (CVPR2023)","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":15,"starsCount":229,"forksCount":9,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-08T06:43:47.366Z"}},{"type":"Public","name":"PhyBench","owner":"OpenGVLab","isFork":false,"description":"The official repo of PhyBench","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":17,"forksCount":1,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,5,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-07T12:08:42.348Z"}},{"type":"Public","name":"VideoMamba","owner":"OpenGVLab","isFork":false,"description":"[ECCV2024] VideoMamba: State Space Model for Efficient Video Understanding","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":33,"starsCount":774,"forksCount":59,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-06T11:07:17.146Z"}},{"type":"Public","name":"LORIS","owner":"OpenGVLab","isFork":false,"description":"Long-Term Rhythmic Video Soundtracker, ICML2023","allTopics":["music-generation","pytorch-implementation","multi-modality","diffusion-models","aigc"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":54,"forksCount":1,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-05T09:08:18.860Z"}},{"type":"Public","name":"EgoVideo","owner":"OpenGVLab","isFork":false,"description":"[CVPR 2024 Champions] Solutions for EgoVis Chanllenges in CVPR 2024","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":3,"starsCount":100,"forksCount":3,"license":null,"participation":[0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,1,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-05T03:26:24.052Z"}},{"type":"Public","name":"MM-NIAH","owner":"OpenGVLab","isFork":false,"description":"This is the official implementation of the paper \"Needle In A Multimodal Haystack\"","allTopics":["benchmark","long-context","vision-language-model","multimodal-large-language-models"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":72,"forksCount":4,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,10,2,24,4,0,3,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-04T12:05:46.932Z"}},{"type":"Public","name":"VisionLLM","owner":"OpenGVLab","isFork":false,"description":"VisionLLM Series","allTopics":["object-detection","large-language-models","generalist-model"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":12,"starsCount":838,"forksCount":19,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-02T04:38:17.214Z"}},{"type":"Public","name":"MUTR","owner":"OpenGVLab","isFork":false,"description":"[AAAI 2024] Referred by Multi-Modality: A Unified Temporal Transformers for Video Object Segmentation","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":62,"forksCount":5,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-26T14:39:46.214Z"}},{"type":"Public","name":"Instruct2Act","owner":"OpenGVLab","isFork":false,"description":"Instruct2Act: Mapping Multi-modality Instructions to Robotic Actions with Large Language Model","allTopics":["robotics","clip","llm","chatgpt","segment-anything"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":322,"forksCount":20,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-23T07:39:41.391Z"}},{"type":"Public","name":"Hulk","owner":"OpenGVLab","isFork":false,"description":"An official implementation of \"Hulk: A Universal Knowledge Translator for Human-Centric Tasks\"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":8,"starsCount":83,"forksCount":4,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-19T09:03:17.777Z"}}],"repositoryCount":67,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"OpenGVLab repositories"}