Batch Delete Image Tags In Harbor

Summary

在频繁ci/cd过程中,导致harbor中images累积了大量tags, 有的多达上千个tag,需采取措施进行清理.

批量删除harbor镜像tags脚本支持两种方式.

1) 保留最后指定数目的tags (tag格式需是数字)

2) 保留指定日期后的所有tags

Script

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/python
#write by itwye

'''
This script support two mothed to delete tags of image repo in harbor.

1) Keep the specified number of tags, which requires the format of the tags to be numeric.
2) Keep tags after the specified date, this allows tags to be in any format.

Usage:

1) python batch_del_image_tags.py --repo myrepo/busybox --delmodel number --number 30
2) python batch_del_image_tags.py --repo myrepo/busybox --delmodel date --date 2019-08-08T08:00:00

'''

import re
import os
import sys
import json
import time
import click
import base64
import requests
import datetime

def http_request(url, method, auth, payload=None, expect_code=200, timeout=30):
TRY_NUM = 3
headers = {
'Content-Type': "application/json",
'authorization': "Basic %s"%auth
}

is_try = True
try_count = 1
while is_try:
response = requests.request(method, url, data=payload, headers=headers, timeout=timeout)
try_count = try_count + 1
if response.status_code == expect_code or try_count > TRY_NUM:
is_try = False

return response


def cal_timestamp(date):

try:
time_array = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
except ValueError:
print "%s Date format error, Date example as 2020-08-08T18:08:00, Quit!"%date
sys.exit(1)
else:
timestamp = int(time.mktime(time_array))

return timestamp


def get_tags_before_specified_date(repo, date):

mid_timestamp = cal_timestamp(date)

get_tags_endpoint = harbor_api_addr + "/repositories/%s/tags"%repo
response = http_request(get_tags_endpoint, "GET", auth, None)
tags_metadata = json.loads(response.text)
need_del_tags = []

for tag_metadata in tags_metadata:
name = tag_metadata["name"]
created = tag_metadata["created"].split(".")[0]
created_timestamp = cal_timestamp(created)
if created_timestamp < mid_timestamp:
need_del_tags.append(name)

return need_del_tags


def get_tags_except_specified_number(repo, keep_tags_total_number):

get_tags_endpoint = harbor_api_addr + "/repositories/%s/tags"%repo
response = http_request(get_tags_endpoint, "GET", auth, None)
tags_metadata = json.loads(response.text)
tags = []
for tag_metadata in tags_metadata:
try:
name = int(tag_metadata["name"])
created = tag_metadata["created"]
except ValueError:
print "Have tag format type is not number, Quit!"
sys.exit(1)
else:
tags.append(name)

if keep_tags_total_number < len(tags):
tags.sort()
need_del_tags = tags[0:len(tags)-keep_tags_total_number]
else:
print "The total number of tags to be reserved is greater than the total number of existing tags, Will not delete any tags, Quit!"
sys.exit(0)

return need_del_tags

def delete_tags(repo,tags):
for tag in tags:
del_tag_endpoint = harbor_api_addr + "/repositories/%s/tags/%s"%(repo,tag)
response = http_request(del_tag_endpoint, "DELETE", auth)
print "Delete:", "%s:%s"%(repo,tag), "ReturnCode:", response.status_code

def help(debug):
print "%s Parameter setting error, Please reference --help option."%debug
sys.exit(1)

@click.command()
@click.option('--repo', help='harbor image repo')
@click.option('--delmodel' ,help='Support delete model of date and number')
@click.option('--date', help='Tags before this date will be deleted')
@click.option('--number', help='Keep the specified number of tags,which requires the format of the tags to be numeric')
def main(repo, delmodel, date, number):
"""
Usage: \n
1) python batch_del_image_tags.py --repo myrepo/busybox --delmodel date --date 2019-08-08T08:00:00 \n

2) python batch_del_image_tags.py --repo myrepo/busybox --delmodel number --number 30 \n
"""
if repo == None or delmodel == None:
help(1)
else:
if delmodel in ["date","number"]:
if delmodel == "date":
if date == None:
help(2)
else:
if re.match(r"(\d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{1,2}:\d{1,2}$)",date):
need_del_tags = get_tags_before_specified_date(repo, date)
print need_del_tags
delete_tags(repo, need_del_tags)
else:
help(3)
if delmodel == "number":
if number == None:
help(4)
else:
if isinstance(int(number),int):
need_del_tags = get_tags_except_specified_number(repo, int(number))
print need_del_tags
delete_tags(repo, need_del_tags)
else:
help(5)
else:
help(6)

if __name__ == "__main__":

# auth = base64.b64encode("user:pwd")
auth = "xxxxxxxxxx"
# api address of harbor
harbor_api_addr = "https://xxx.xxx.xxx/api"
main()