Media Gen SolutionSupported image utilities

Outpainting

Extend beyond the canvas of an existing image.

Outpainting is the process of using an image generation model like Stable Diffusion to extend beyond the canvas of an existing image. Outpainting is very similar to inpainting, but instead of generating a region within an existing image, the model generates a region outside of it.

Init Image

Output Image

Outpainting works both with SDXL and SD1.5. At a high level, outpainting works like this:

Choose an existing image you’d like to outpaint.
Create a source image that places your original image within a larger canvas.
Create a black and white mask image.
Use init_image (source image), mask_image (your mask image), a text prompt and outpainting parameter as inputs to Image Gen API to generate a new image.

In the following example we will leverage the SD1.5 engine so we’ll start with a 512x512 image.

Init Image (512X512)

You can extend the image in any direction, but for this example we’ll extend the width from 512 → 768. Supported resolutions for SD1.5 are:

(W, H): (768, 576), (1024, 576), (640, 512), (384, 704), (640, 768), (640, 640), (1024, 768), (1536, 1024), (768, 1024), (576, 448), (1024, 1024), (896, 896), (704, 1216), (512, 512), (448, 576), (832, 512), (512, 704), (576, 768), (1216, 704), (512, 768), (512, 832), (1024, 1536), (576, 1024), (704, 384), (768, 512)

Support resolutions for SDXL are:

(W, H): {(1536, 640), (768, 1344), (832, 1216), (1344, 768), (1152, 896), (640, 1536), (1216, 832), (896, 1152), (1024, 1024)}

Two images are required for the next step. An init_image and mask_image are required in order to perform the outpainting to 768 pixels. Convert both images to base64.

Init Image (768X512)

Mask Image

Example Code for Outpainting:

Python

1 import requests
2 import json
3 import os
4 import base64
5 import time
6 import io
7 import PIL.Image
8 
9 def _process_test(url):
10     
11     image = PIL.Image.open("init-image-outpainting.jpg")
12     mask = PIL.Image.open("mask-outpainting.jpg")
13     # with PIL.Image.open("init-image-outpainting.png") as im:
14     #     rgb_im = im.convert('RGBA')
15     # rgb_im.save('init.jpg')
16     # image = PIL.Image.open("init.jpg")
17 
18     # with PIL.Image.open("mask-outpainting.png") as it:
19     #     rgb_it = it.convert('RGBA')
20     # rgb_it.save('mask.jpg')
21     # mask = PIL.Image.open("mask.jpg")
22 
23     # Create a BytesIO buffer to hold the image data
24     image_buffer = io.BytesIO()
25     image.save(image_buffer, format='JPEG')
26     image_bytes = image_buffer.getvalue()
27     encoded_image = base64.b64encode(image_bytes).decode('utf-8')
28 
29     # Create a BytesIO buffer to hold the image data
30     mask_buffer = io.BytesIO()
31     mask.save(mask_buffer, format='JPEG')
32     mask_bytes = mask_buffer.getvalue()
33     encoded_mask = base64.b64encode(mask_bytes).decode('utf-8')
34 
35     OCTOAI_TOKEN = os.environ.get("OCTOAI_TOKEN")
36 
37     payload = {
38         "prompt": "portrait view, cartoon sketch of a woman, green grass with fall foliage in the back, blurry background, blue and grey sky",
39         "negative_prompt": "path, leaves on the ground, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, fused thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, horn, extra eyes, 2girl, amputation",
40         "width": 768,
41         "height": 512,
42         "num_images": 1,
43         "sampler": "DDPM",
44         "steps": 40,
45         "cfg_scale": 12,
46         "style_preset": "base",
47         "outpainting": "true",
48         "init_image": encoded_image,
49         "mask_image": encoded_mask,
50         "strength": 1
51     }
52     headers = {
53         "Authorization": f"Bearer {OCTOAI_TOKEN}",
54         "Content-Type": "application/json",
55         "X-OctoAI-Queue-Dispatch": "true"
56     }
57 
58     response = requests.post(url, headers=headers, json=payload)
59 
60     if response.status_code != 200:
61         print(response.text)
62     print(response.json())
63 
64     img_list = response.json()["images"]
65 
66     for i, img_info in enumerate(img_list):
67         img_bytes = base64.b64decode(img_info["image_b64"])
68         img = PIL.Image.open(io.BytesIO(img_bytes))
69         img.load()
70         img.save(f"result_image{i}.jpg")
71 
72 if __name__ == "__main__":
73     _process_test("https://image.octoai.run/generate/sd")

Here’s the outpainted output that the above code generates:

Output image

1	import requests
2	import json
3	import os
4	import base64
5	import time
6	import io
7	import PIL.Image
8
9	def _process_test(url):
10
11	image = PIL.Image.open("init-image-outpainting.jpg")
12	mask = PIL.Image.open("mask-outpainting.jpg")
13	# with PIL.Image.open("init-image-outpainting.png") as im:
14	# rgb_im = im.convert('RGBA')
15	# rgb_im.save('init.jpg')
16	# image = PIL.Image.open("init.jpg")
17
18	# with PIL.Image.open("mask-outpainting.png") as it:
19	# rgb_it = it.convert('RGBA')
20	# rgb_it.save('mask.jpg')
21	# mask = PIL.Image.open("mask.jpg")
22
23	# Create a BytesIO buffer to hold the image data
24	image_buffer = io.BytesIO()
25	image.save(image_buffer, format='JPEG')
26	image_bytes = image_buffer.getvalue()
27	encoded_image = base64.b64encode(image_bytes).decode('utf-8')
28
29	# Create a BytesIO buffer to hold the image data
30	mask_buffer = io.BytesIO()
31	mask.save(mask_buffer, format='JPEG')
32	mask_bytes = mask_buffer.getvalue()
33	encoded_mask = base64.b64encode(mask_bytes).decode('utf-8')
34
35	OCTOAI_TOKEN = os.environ.get("OCTOAI_TOKEN")
36
37	payload = {
38	"prompt": "portrait view, cartoon sketch of a woman, green grass with fall foliage in the back, blurry background, blue and grey sky",
39	"negative_prompt": "path, leaves on the ground, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, fused thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, horn, extra eyes, 2girl, amputation",
40	"width": 768,
41	"height": 512,
42	"num_images": 1,
43	"sampler": "DDPM",
44	"steps": 40,
45	"cfg_scale": 12,
46	"style_preset": "base",
47	"outpainting": "true",
48	"init_image": encoded_image,
49	"mask_image": encoded_mask,
50	"strength": 1
51	}
52	headers = {
53	"Authorization": f"Bearer {OCTOAI_TOKEN}",
54	"Content-Type": "application/json",
55	"X-OctoAI-Queue-Dispatch": "true"
56	}
57
58	response = requests.post(url, headers=headers, json=payload)
59
60	if response.status_code != 200:
61	print(response.text)
62	print(response.json())
63
64	img_list = response.json()["images"]
65
66	for i, img_info in enumerate(img_list):
67	img_bytes = base64.b64decode(img_info["image_b64"])
68	img = PIL.Image.open(io.BytesIO(img_bytes))
69	img.load()
70	img.save(f"result_image{i}.jpg")
71
72	if __name__ == "__main__":
73	_process_test("https://image.octoai.run/generate/sd")