{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"PALO","owner":"mbzuai-oryx","isFork":false,"description":"(WACV 2025) Vision-language conversation in 10 languages including English, Chinese, French, Spanish, Russian, Japanese, Arabic, Hindi, Bengali and Urdu.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":77,"forksCount":5,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-03T06:18:46.722Z"}},{"type":"Public","name":"Video-ChatGPT","owner":"mbzuai-oryx","isFork":false,"description":"[ACL 2024 🔥] Video-ChatGPT is a video conversation model capable of generating meaningful conversation about videos. It combines the capabilities of LLMs with a pretrained visual encoder adapted for spatiotemporal video representation. We also introduce a rigorous 'Quantitative Evaluation Benchmarking' for video-based conversational models.","allTopics":["chatbot","llama","clip","mulit-modal","vision-language","vicuna","gpt-4","vision-language-pretraining","llava","video-chatboat","video-conversation"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":17,"starsCount":1146,"forksCount":98,"license":"Creative Commons Attribution 4.0 International","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-27T05:58:28.262Z"}},{"type":"Public","name":"CVRR-Evaluation-Suite","owner":"mbzuai-oryx","isFork":false,"description":"Official repository of paper titled \"How Good is my Video LMM? Complex Video Reasoning and Robustness Evaluation Suite for Video-LMMs\".","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":39,"forksCount":2,"license":"Creative Commons Attribution 4.0 International","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-23T08:22:18.411Z"}},{"type":"Public","name":"BiMediX","owner":"mbzuai-oryx","isFork":false,"description":"Bilingual Medical Mixture of Experts LLM","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":1,"starsCount":24,"forksCount":1,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-15T08:01:36.420Z"}},{"type":"Public","name":"BiMediX2","owner":"mbzuai-oryx","isFork":false,"description":"Bio-Medical EXpert LMM with English and Arabic Language Capabilities","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-12T11:28:44.944Z"}},{"type":"Public","name":"VideoGPT-plus","owner":"mbzuai-oryx","isFork":false,"description":"Official Repository of paper VideoGPT+: Integrating Image and Video Encoders for Enhanced Video Understanding","allTopics":["chatbot","clip","image-encoder","video-encoder","multimodal","dual-encoder","vision-language","vicuna","gpt4","vision-language-pretraining","llava","video-conversation","video-chatbot","llama3","gpt4o","phi-3-mini"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":15,"starsCount":183,"forksCount":11,"license":"Creative Commons Attribution 4.0 International","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-11T16:24:43.173Z"}},{"type":"Public","name":"XrayGPT","owner":"mbzuai-oryx","isFork":false,"description":"[BIONLP@ACL 2024] XrayGPT: Chest Radiographs Summarization using Medical Vision-Language Models.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":16,"starsCount":459,"forksCount":52,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-08T06:31:22.529Z"}},{"type":"Public","name":"GeoChat","owner":"mbzuai-oryx","isFork":false,"description":"[CVPR 2024 🔥] GeoChat, the first grounded Large Vision Language Model for Remote Sensing","allTopics":["remote-sensing","vlm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":30,"starsCount":404,"forksCount":29,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-25T19:16:49.137Z"}},{"type":"Public","name":"LLaVA-pp","owner":"mbzuai-oryx","isFork":false,"description":"🔥🔥 LLaVA++: Extending LLaVA with Phi-3 and LLaMA-3 (LLaVA LLaMA-3, LLaVA Phi-3)","allTopics":["conversation","lmms","vision-language","llm","llava","llama3","phi3","llava-llama3","llava-phi3","llama3-llava","phi3-llava","llama-3-vision","phi3-vision","llama-3-llava","phi-3-llava","llama3-vision","phi-3-vision"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":15,"starsCount":789,"forksCount":57,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-10T06:23:08.829Z"}},{"type":"Public","name":"groundingLMM","owner":"mbzuai-oryx","isFork":false,"description":"[CVPR 2024 🔥] Grounding Large Multimodal Model (GLaMM), the first-of-its-kind model capable of generating natural language responses that are seamlessly integrated with object segmentation masks.","allTopics":["vision-and-language","lmm","foundation-models","vision-language-model","llm-agent"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":18,"starsCount":738,"forksCount":37,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-02T01:07:18.620Z"}},{"type":"Public","name":"MobiLlama","owner":"mbzuai-oryx","isFork":false,"description":"MobiLlama : Small Language Model tailored for edge devices","allTopics":["slm","llm","efficient-llm","mobile-llm","tiny-llm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":13,"starsCount":579,"forksCount":42,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-03T22:04:01.419Z"}},{"type":"Public","name":"ClimateGPT","owner":"mbzuai-oryx","isFork":false,"description":"[EMNLP'23] ClimateGPT: a specialized LLM for conversations related to Climate Change and Sustainability topics in both English and Arabic languages.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":73,"forksCount":9,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-30T17:04:52.501Z"}},{"type":"Public","name":"Video-LLaVA","owner":"mbzuai-oryx","isFork":false,"description":"PG-Video-LLaVA: Pixel Grounding in Large Multimodal Video Models","allTopics":["video","transcription","lmm","grounding","video-grounding","llm","video-conversation"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":14,"starsCount":233,"forksCount":11,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-02T17:51:01.418Z"}},{"type":"Public","name":"Awesome-CV-Foundational-Models","owner":"mbzuai-oryx","isFork":true,"description":"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":26,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-07-31T11:27:15.148Z"}}],"repositoryCount":14,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"mbzuai-oryx repositories"}