diff --git a/lib/active_storage/service/azure_blob_service.rb b/lib/active_storage/service/azure_blob_service.rb index f4ea673..7d951c1 100644 --- a/lib/active_storage/service/azure_blob_service.rb +++ b/lib/active_storage/service/azure_blob_service.rb @@ -123,10 +123,9 @@ def headers_for_direct_upload(key, content_type:, checksum:, filename: nil, disp def compose(source_keys, destination_key, filename: nil, content_type: nil, disposition: nil, custom_metadata: {}) content_disposition = content_disposition_with(type: disposition, filename: filename) if disposition && filename - # use copy_blob operation if composing a new blob from a single existing blob - # and that single blob is <= 256 MiB which is the upper limit for copy_blob operation - if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 256.megabytes - client.copy_blob(destination_key, source_keys[0], metadata: custom_metadata) + # use put_blob operation if composing a new blob from a single existing blob + if source_keys.length == 1 + client.put_blob(destination_key, source_keys[0], metadata: custom_metadata) else client.create_append_blob( destination_key, diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 222d294..bae5e8a 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -333,6 +333,56 @@ def append_blob_block(key, content, options = {}) Http.new(uri, headers, signer:).put(content) end + # Creates a blob from an existing blob between containers or within the same container + # + # Calls to {Put Blob From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-blob-from-url] + # + # Parameters: + # - key: destination blob path + # - source_key: source blob path + # - options: additional options + # - source_client: AzureBlob::Client instance for the source container (optional) + # If not provided, copies from within the same container + # + def put_blob(key, source_key, options = {}) + source_client = options.fetch(:source_client, self) + source_blob = source_client.get_blob_properties(source_key) + + options[:content_size] = source_blob.size + options[:content_type] = source_blob.content_type + options[:content_md5] = source_blob.checksum + options[:content_disposition] = source_blob.content_disposition + + if source_blob.size <= (options[:block_size] || DEFAULT_BLOCK_SIZE) + put_blob_from_url_single(key, source_key, **options) + else + put_blob_from_url_multiple(key, source_key, **options) + end + end + + # Uploads a block to a blob. + # + # Calls to {Put Block From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-from-url] + # + # Returns the id of the block. Required to commit the list of blocks to a blob. + def put_blob_block_from_url(key, source_uri, index, block_size, source_size, options = {}) + block_id = generate_block_id(index) + uri = generate_uri("#{container}/#{key}") + query = { comp: "block", blockid: block_id } + query[:timeout] = options[:timeout] if options[:timeout] + uri.query = URI.encode_www_form(**query) + + headers = { + "Content-Length": 0, + "x-ms-copy-source": source_uri.to_s, + "x-ms-source-range": "bytes=#{index * block_size}-#{[(index + 1) * block_size - 1, source_size - 1].min}", + }.merge(additional_headers(options)) + + Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put + + block_id + end + # Uploads a block to a blob. # # Calls to {Put Block}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-block] @@ -428,6 +478,38 @@ def put_blob_single(key, content, options = {}) Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put(content.read) end + def put_blob_from_url_multiple(key, source_key, options = {}) + source_client = options.delete(:source_client) || self + + # generate source uri valid for 1 hour + source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 3600).utc.iso8601) + + block_size = options[:block_size] || DEFAULT_BLOCK_SIZE + block_count = (options[:content_size].to_f / block_size).ceil + block_ids = block_count.times.map do |i| + put_blob_block_from_url(key, source_uri, i, block_size, options[:content_size], options.slice(:timeout)) + end + + commit_blob_blocks(key, block_ids, options) + end + + def put_blob_from_url_single(key, source_key, options = {}) + source_client = options.delete(:source_client) || self + uri = generate_uri("#{container}/#{key}") + uri.query = URI.encode_www_form(timeout: options[:timeout]) if options[:timeout] + + # generate source uri valid for 1 hour + source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 3600).utc.iso8601) + + headers = { + "Content-Length": 0, + "x-ms-copy-source": source_uri.to_s, + "x-ms-blob-type": "BlockBlob", + }.merge(additional_headers(options)) + + Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put + end + def content_size(content) if content.respond_to?(:bytesize) content.bytesize diff --git a/test/client/test_client.rb b/test/client/test_client.rb index 77d43a9..d727a58 100644 --- a/test/client/test_client.rb +++ b/test/client/test_client.rb @@ -190,6 +190,26 @@ def test_copy assert_equal content, client.get_blob(copy_key) end + def test_single_block_put_blob_from_url + client.create_block_blob(key, content) + assert_equal content, client.get_blob(key) + + copy_key = "#{key}_copy" + client.put_blob(copy_key, key) + + assert_equal content, client.get_blob(copy_key) + end + + def test_multi_block_put_blob_from_url + client.create_block_blob(key, content) + assert_equal content, client.get_blob(key) + + copy_key = "#{key}_copy" + client.put_blob(copy_key, key, block_size: 1) + + assert_equal content, client.get_blob(copy_key) + end + def test_delete client.create_block_blob(key, content) assert_equal content, client.get_blob(key)