Any-to-Any
Transformers
Diffusers
Safetensors
English
llada2_moe
feature-extraction
multimodal
image-generation
image-understanding
image-editing
diffusion
Mixture of Experts
text-to-image
custom_code
Instructions to use inclusionAI/LLaDA2.0-Uni with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use inclusionAI/LLaDA2.0-Uni with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("inclusionAI/LLaDA2.0-Uni", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
Update modeling_llada2uni_moe.py
Browse files
modeling_llada2uni_moe.py
CHANGED
|
@@ -2419,9 +2419,12 @@ class LLaDA2MoeModelLM(LLaDA2MoePreTrainedModel, GenerationMixin):
|
|
| 2419 |
tok = self._get_tokenizer(tokenizer)
|
| 2420 |
sp = self._get_special_tokens(tok, image_h, image_w)
|
| 2421 |
|
| 2422 |
-
|
| 2423 |
-
|
| 2424 |
-
|
|
|
|
|
|
|
|
|
|
| 2425 |
|
| 2426 |
out = self.generate_bd(
|
| 2427 |
data={"input_ids": torch.tensor(ids).unsqueeze(0).to(self.device)},
|
|
@@ -2433,7 +2436,7 @@ class LLaDA2MoeModelLM(LLaDA2MoePreTrainedModel, GenerationMixin):
|
|
| 2433 |
image_keep_ratio=image_keep_ratio, text_keep_ratio=text_keep_ratio,
|
| 2434 |
show_progress=False,
|
| 2435 |
)
|
| 2436 |
-
return tok.decode(out[0][len(ids)
|
| 2437 |
|
| 2438 |
@torch.no_grad()
|
| 2439 |
def edit_image(self, image_tokens, image_h, image_w, instruction,
|
|
|
|
| 2419 |
tok = self._get_tokenizer(tokenizer)
|
| 2420 |
sp = self._get_special_tokens(tok, image_h, image_w)
|
| 2421 |
|
| 2422 |
+
user = self._build_image_header(sp) + image_tokens + sp["eoi"] \
|
| 2423 |
+
+ tok("\n").input_ids + (tok(question).input_ids if question else [])
|
| 2424 |
+
sys_ids, user_ids, asst_ids = self._build_chat(
|
| 2425 |
+
tok, "You are a multimodal understanding assistant.", user,
|
| 2426 |
+
)
|
| 2427 |
+
ids = sys_ids + user_ids + asst_ids
|
| 2428 |
|
| 2429 |
out = self.generate_bd(
|
| 2430 |
data={"input_ids": torch.tensor(ids).unsqueeze(0).to(self.device)},
|
|
|
|
| 2436 |
image_keep_ratio=image_keep_ratio, text_keep_ratio=text_keep_ratio,
|
| 2437 |
show_progress=False,
|
| 2438 |
)
|
| 2439 |
+
return tok.decode(out[0][len(ids):], skip_special_tokens=True)
|
| 2440 |
|
| 2441 |
@torch.no_grad()
|
| 2442 |
def edit_image(self, image_tokens, image_h, image_w, instruction,
|