-
Notifications
You must be signed in to change notification settings - Fork 0
/
watermarking.py
199 lines (158 loc) · 6.07 KB
/
watermarking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# %%
# Here we will use the utils dataset (form pytorch) to define the dataset itself.
# from datasets import load_dataset
import torch
from datasets import Dataset, DatasetDict, Features, load_dataset
# I think I havent resized the images to values between 0 and 1
print(f"Torch cuda available: {torch.cuda.is_available()}")
print(f"Torch cuda version {torch.version.cuda}")
# %% Load the dataset
dataset = load_dataset("nelorth/oxford-flowers")
# %% See an image
import matplotlib.pyplot as plt
image_idx = 600 # choose one of the 7169 training images
image = dataset["train"][image_idx]["image"]
label = dataset["train"][image_idx]["label"] # laabls for classif., not used
# Display the image
plt.imshow(image)
plt.title(f"Label: {label}")
plt.axis("off") # Turn off axis numbers and ticks
plt.show()
# %%
width, height = (500, 500) # intended images for the model
# %%
from PIL import Image, ImageDraw, ImageFont
from PIL.Image import Resampling
def create_watermark(
text="Watermark",
font_path="arial.ttf",
font_size=40,
opacity=128,
rotation=0,
width=width,
height=height,
):
watermark = Image.new("RGBA", (width, height), (0, 0, 0, 0))
draw = ImageDraw.Draw(watermark)
font = ImageFont.truetype(font_path, font_size)
# Using textbbox instead of textsize
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
x = (width - text_width) / 2
y = (height - text_height) / 2
draw.text((x, y), text, font=font, fill=(255, 255, 255, opacity))
# Rotate without expansion
watermark = watermark.rotate(rotation, expand=False)
# Resize the watermark using Resampling.LANCZOS
watermark = watermark.resize((width, height), Resampling.LANCZOS)
return watermark
# %%
def apply_watermark(image, watermark, position=(0, 0)):
# Open the original image
image = image.resize((width, height), Resampling.LANCZOS)
image = image.convert("RGBA")
# Ensure the watermark is in the same mode and resized appropriately
if watermark.mode != "RGBA":
watermark = watermark.convert("RGBA")
# Create an image to put the watermark on
layer = Image.new("RGBA", image.size, (0, 0, 0, 0))
layer.paste(watermark, position, watermark)
# Composite the watermark with the original image
watermarked_image = Image.alpha_composite(image, layer)
# Convert to RGB and save (if dealing with JPEGs which do not support alpha)
rgb_image = watermarked_image.convert("RGB")
rgb_image.save("watermarked_image.jpg")
return rgb_image
# %% See the watermaked image
rotation = 0 # degrees anticlockwise (normal)
font_path = "arial.ttf" # true type font file, available on PIL.ImageFont
opacity = 210 # 0 (transparent) to 255 (opaque)
image = dataset["train"][image_idx]["image"]
text = "transcendingvictor"
font_size = 60
position = (0, 0) # (0,0) centre of the image. Limits: -250 to 250.
watermark = create_watermark(
text=text,
font_path=font_path,
opacity=opacity,
rotation=rotation,
width=width,
height=height,
font_size=font_size,
)
apply_watermark(image, watermark, position=position)
# %% (3mins) New training and testing dataset with the above watermark
from tqdm.auto import tqdm
original_images_train = []
watermarked_images_train = []
for item in tqdm(dataset["train"]):
image = item["image"].convert("RGB") # Ensure image is in RGB
image = image.resize((width, height), Resampling.LANCZOS) # 500x500
watermarked_image = apply_watermark(image, watermark, position=position)
# Instead of converting to bytes, directly append the PIL Image objects
original_images_train.append(image)
watermarked_images_train.append(watermarked_image)
original_images_test = []
watermarked_images_test = []
for item in tqdm(dataset["test"]):
image = item["image"].convert("RGB")
image = image.resize((width, height), Resampling.LANCZOS)
watermarked_image = apply_watermark(image, watermark, position=position)
# Convert PIL Images to bytes to store in datasets
original_images_test.append(image)
watermarked_images_test.append(watermarked_image)
# %% Inspect the list to check if the images are stored.
image_index = 3422
image = watermarked_images_train[image_index]
label = "Watermarked"
# Display the image
plt.imshow(image)
plt.title(f"Label: {label}")
plt.axis("off")
plt.show()
image = original_images_train[image_index]
label = "Original (reshaped)"
# Display the image
plt.imshow(image)
plt.title(f"Label: {label}")
plt.axis("off")
plt.show()
# %%
# Non-serialixing takes too much time!! Don't do it or plan in adavance..
# still sometimes i cant see the flowers in hf so idk if worth it
from datasets import Image
# Assume original_images_train and original_images_test are already populated
features = Features({"image": Image()})
# Convert lists of images to datasets
# %% takes around 10 mins
train_dataset_original = Dataset.from_dict({"image": original_images_train})
# %% takes around 3 mins
test_dataset_original = Dataset.from_dict({"image": original_images_test})
# %% takes around 10 mins
train_dataset_watermark = Dataset.from_dict({"image": watermarked_images_train})
# %% takes around 3 mins
test_dataset_watermark = Dataset.from_dict({"image": watermarked_images_test})
# %% takes around 6 mins
# Combine the datasets into a single DatasetDict
dataset_dict_original = DatasetDict(
{"train": train_dataset_original, "test": test_dataset_original}
)
# %% takes around 6 mins
dataset_dict_watermarked = DatasetDict(
{"train": train_dataset_watermark, "test": test_dataset_watermark}
)
# Authenticate with Hugging Face (ensure you've logged in via CLI)
# from huggingface_hub import notebook_login
# notebook_login()
# %% takes around 7 mins
# Upload the dataset to Hugging Face Hub
dataset_dict_original.push_to_hub(
repo_id="transcendingvictor/original_flowers_dataset",
token="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",
)
dataset_dict_watermarked.push_to_hub(
repo_id="transcendingvictor/watermark1_flowers_dataset",
token="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",
)
# %%