J'ai 'écrit' une fonction pour copier des objets d'un emplacement sur un compartiment S3 à un autre dans le même compartiment. (En fait, il est la plupart du temps enlevé du code s3cmd).Comment trouver toutes les permissions IAM nécessaires pour exécuter cette fonction?
import sys
import time
from copy import copy
import logging
from S3.Exceptions import *
from S3.S3 import S3
from S3.Config import Config
from S3.FileDict import FileDict
from S3.S3Uri import S3Uri
from S3.Utils import *
from S3.FileLists import *
from S3.ExitCodes import EX_OK
LOG = logging.getLogger()
# Unfortunately the s3cmd implementation uses a global instance of config object
# everywhere
cfg = None
def init_s3_cfg(access_key, secret_key):
global cfg
cfg = Config(access_key=access_key, secret_key=secret_key)
def cmd_sync_remote2remote(str_from_path, str_destination_base):
'''
This function is adopted from s3cmd project https://github.com/s3tools/s3cmd
because boto does not support recursive copy out of the box
:param str_from_path:
:param str_destination_base:
:return:
'''
s3 = S3(cfg)
LOG.info(s3.config.bucket_location)
# Normalise s3://uri (e.g. assert trailing slash)
from_path = S3Uri(str_from_path).uri()
destination_base = S3Uri(str_destination_base).uri()
LOG.info("from %s to %s" % (from_path, destination_base))
src_list, src_exclude_list = fetch_remote_list(s3, from_path,
recursive=True, require_attribs=True)
dst_list, dst_exclude_list = fetch_remote_list(s3, destination_base,
recursive=True, require_attribs=True)
src_count = len(src_list)
dst_count = len(dst_list)
LOG.info(u"Found %d source files, %d destination files" %
(src_count, dst_count))
src_list, dst_list, update_list, copy_pairs = compare_filelists(src_list,
dst_list, src_remote=True, dst_remote=True)
src_count = len(src_list)
update_count = len(update_list)
dst_count = len(dst_list)
LOG.info(u"Summary: %d source files to copy, %d files at destination to delete"
% (src_count, dst_count))
# Populate 'target_uri' only if we've got something to sync from src to dst
for key in src_list:
src_list[key]['target_uri'] = destination_base + key
for key in update_list:
update_list[key]['target_uri'] = destination_base + key
def _upload(src_list, seq, src_count):
file_list = src_list.keys()
file_list.sort()
for file in file_list:
seq += 1
item = src_list[file]
src_uri = S3Uri(item['object_uri_str'])
dst_uri = S3Uri(item['target_uri'])
extra_headers = copy(cfg.extra_headers)
try:
_response = s3.object_copy(src_uri, dst_uri, extra_headers)
LOG.info("File %(src)s copied to %(dst)s" % { "src" : src_uri, "dst" : dst_uri })
except S3Error, e:
LOG.error("File %(src)s could not be copied: %(e)s" % { "src" : src_uri, "e" : e })
return seq
# Perform the synchronization of files
timestamp_start = time.time()
seq = 0
seq = _upload(src_list, seq, src_count + update_count)
seq = _upload(update_list, seq, src_count + update_count)
n_copied, bytes_saved, failed_copy_files = remote_copy(s3, copy_pairs, destination_base)
# Process files not copied
debug("Process files that was not remote copied")
failed_copy_count = len (failed_copy_files)
for key in failed_copy_files:
failed_copy_files[key]['target_uri'] = destination_base + key
seq = _upload(failed_copy_files, seq, src_count + update_count + failed_copy_count)
total_elapsed = max(1.0, time.time() - timestamp_start)
outstr = "Done. Copied %d files in %0.1f seconds, %0.2f files/s" % (seq, total_elapsed, seq/total_elapsed)
LOG.info(outstr)
return EX_OK
def remote_copy(s3, copy_pairs, destination_base):
saved_bytes = 0
failed_copy_list = FileDict()
for (src_obj, dst1, dst2) in copy_pairs:
LOG.debug(u"Remote Copying from %s to %s" % (dst1, dst2))
dst1_uri = S3Uri(destination_base + dst1)
dst2_uri = S3Uri(destination_base + dst2)
extra_headers = copy(cfg.extra_headers)
try:
s3.object_copy(dst1_uri, dst2_uri, extra_headers)
info = s3.object_info(dst2_uri)
saved_bytes = saved_bytes + long(info['headers']['content-length'])
LOG.info(u"remote copy: %s -> %s" % (dst1, dst2))
except:
LOG.warning(u'Unable to remote copy files %s -> %s' % (dst1_uri, dst2_uri))
failed_copy_list[dst2] = src_obj
return (len(copy_pairs), saved_bytes, failed_copy_list)
Cela fonctionne bien si les clés s3 ont toutes les permissions S3. Cependant, je veux utiliser un IAM avec un sous-ensemble d'autorisations pour appeler cette fonction. Voici ma politique actuelle du groupe:
{
"Statement": [
{
"Sid": "cloneFiles",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:PutObjectAcl",
"s3:DeleteObject"
],
"Effect": "Allow",
"Resource": [
"arn:aws:s3:::target-bucket/*"
]
}
]
}
Avec cette nouvelle politique, je suis arrivé ce message d'erreur:
ERROR:root:S3 error: Access Denied
Je veux savoir:
1) Est-il un moyen facile de travailler quelle permission manque (par exemple un certain paramètre, env var)? S3 peut-il indiquer quelle permission est requise? Si oui, comment puis-je le savoir?
2) Quelqu'un peut-il, en lisant le code ou autrement, identifier la permission manquante?
Pourquoi prenez-vous le code de s3cmd? Vous devez utiliser le [SDK AWS officiel pour Python] (https://boto3.readthedocs.io/en/latest/). Vous pouvez également envisager d'utiliser [AWS Command-Line Interface (CLI)] (http://aws.amazon.com/cli/), qui contient d'excellentes commandes pour Amazon S3, telles que 'aws s3 sync' qui synchronisera les fichiers de/vers S3. –
Je dois piloter le processus par programme. –