From 923b6d12b22daa75bfd01c4c6759805a18e29581 Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 5 Feb 2026 08:13:23 -0600 Subject: [PATCH 1/7] chore: add in put_blob operation and update compose to use that instead of copy_blob --- .../service/azure_blob_service.rb | 8 +++--- lib/azure_blob/client.rb | 27 +++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/lib/active_storage/service/azure_blob_service.rb b/lib/active_storage/service/azure_blob_service.rb index f4ea673..38d25e7 100644 --- a/lib/active_storage/service/azure_blob_service.rb +++ b/lib/active_storage/service/azure_blob_service.rb @@ -123,10 +123,10 @@ def headers_for_direct_upload(key, content_type:, checksum:, filename: nil, disp def compose(source_keys, destination_key, filename: nil, content_type: nil, disposition: nil, custom_metadata: {}) content_disposition = content_disposition_with(type: disposition, filename: filename) if disposition && filename - # use copy_blob operation if composing a new blob from a single existing blob - # and that single blob is <= 256 MiB which is the upper limit for copy_blob operation - if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 256.megabytes - client.copy_blob(destination_key, source_keys[0], metadata: custom_metadata) + # use put_blob operation if composing a new blob from a single existing blob + # and that single blob is <= 5000 MiB which is the upper limit for put_blob operation + if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 5000.megabytes + client.put_blob(destination_key, source_keys[0], metadata: custom_metadata) else client.create_append_blob( destination_key, diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 222d294..3d2bf4c 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -333,6 +333,33 @@ def append_blob_block(key, content, options = {}) Http.new(uri, headers, signer:).put(content) end + # Creates a blob from an existing blob between containers or within the same container + # + # Calls to {Put Blob From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-blob-from-url] + # + # Parameters: + # - key: destination blob path + # - source_key: source blob path + # - options: additional options + # - source_client: AzureBlob::Client instance for the source container (optional) + # If not provided, copies from within the same container + # + def put_blob(key, source_key, options = {}) + source_client = options.delete(:source_client) || self + uri = generate_uri("#{container}/#{key}") + uri.query = URI.encode_www_form(timeout: options[:timeout]) if options[:timeout] + + source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 300).utc.iso8601) + + headers = { + "Content-Length": 0, + "x-ms-copy-source": source_uri.to_s, + "x-ms-blob-type": "BlockBlob", + }.merge(additional_headers(options)) + + Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put + end + # Uploads a block to a blob. # # Calls to {Put Block}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-block] From 1afebab7df4cbdb47642d4f1156f8df65c7f1eee Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 5 Feb 2026 10:46:24 -0600 Subject: [PATCH 2/7] chore: increase expiry time for source blob in put_blob to 15 minutes from 5 minutes --- lib/azure_blob/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 3d2bf4c..cce48e3 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -349,7 +349,7 @@ def put_blob(key, source_key, options = {}) uri = generate_uri("#{container}/#{key}") uri.query = URI.encode_www_form(timeout: options[:timeout]) if options[:timeout] - source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 300).utc.iso8601) + source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 900).utc.iso8601) headers = { "Content-Length": 0, From 74379bf2abcf88ee342c823be1773dc470a41815 Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 5 Feb 2026 11:45:35 -0600 Subject: [PATCH 3/7] chore: limit put_blob within compose to blobs <= 1000 MiB --- lib/active_storage/service/azure_blob_service.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/active_storage/service/azure_blob_service.rb b/lib/active_storage/service/azure_blob_service.rb index 38d25e7..c55d34e 100644 --- a/lib/active_storage/service/azure_blob_service.rb +++ b/lib/active_storage/service/azure_blob_service.rb @@ -124,8 +124,8 @@ def compose(source_keys, destination_key, filename: nil, content_type: nil, disp content_disposition = content_disposition_with(type: disposition, filename: filename) if disposition && filename # use put_blob operation if composing a new blob from a single existing blob - # and that single blob is <= 5000 MiB which is the upper limit for put_blob operation - if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 5000.megabytes + # and that single blob is <= 1000 MiB which is the under the upper limit of 5000 MIB for put_blob operation + if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 1000.megabytes client.put_blob(destination_key, source_keys[0], metadata: custom_metadata) else client.create_append_blob( From e1a677adb1b4c97840d3e9fee349c7048aca7c91 Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 5 Feb 2026 12:50:35 -0600 Subject: [PATCH 4/7] chore: refactor put-blob to use put block from url when > 128MiB --- .../service/azure_blob_service.rb | 3 +- lib/azure_blob/client.rb | 67 +++++++++++++++++-- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/lib/active_storage/service/azure_blob_service.rb b/lib/active_storage/service/azure_blob_service.rb index c55d34e..7d951c1 100644 --- a/lib/active_storage/service/azure_blob_service.rb +++ b/lib/active_storage/service/azure_blob_service.rb @@ -124,8 +124,7 @@ def compose(source_keys, destination_key, filename: nil, content_type: nil, disp content_disposition = content_disposition_with(type: disposition, filename: filename) if disposition && filename # use put_blob operation if composing a new blob from a single existing blob - # and that single blob is <= 1000 MiB which is the under the upper limit of 5000 MIB for put_blob operation - if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 1000.megabytes + if source_keys.length == 1 client.put_blob(destination_key, source_keys[0], metadata: custom_metadata) else client.create_append_blob( diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index cce48e3..04c0ad3 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -345,19 +345,42 @@ def append_blob_block(key, content, options = {}) # If not provided, copies from within the same container # def put_blob(key, source_key, options = {}) - source_client = options.delete(:source_client) || self - uri = generate_uri("#{container}/#{key}") - uri.query = URI.encode_www_form(timeout: options[:timeout]) if options[:timeout] + source_client = options.fetch(:source_client, self) + source_blob = source_client.get_blob_properties(source_key) - source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 900).utc.iso8601) + options[:content_size] = source_blob.size + options[:content_type] = source_blob.content_type + options[:content_md5] = source_blob.checksum + options[:content_disposition] = source_blob.content_disposition + + if source_blob.size <= (options[:block_size] || DEFAULT_BLOCK_SIZE) + put_blob_from_url_single(key, source_key, **options) + else + put_blob_from_url_multiple(key, source_key, **options) + end + end + + # Uploads a block to a blob. + # + # Calls to {Put Block From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-from-url] + # + # Returns the id of the block. Required to commit the list of blocks to a blob. + def put_blob_block_from_url(key, source_uri, index, block_size, options = {}) + block_id = generate_block_id(index) + uri = generate_uri("#{container}/#{key}") + query = { comp: "block", blockid: block_id } + query[:timeout] = options[:timeout] if options[:timeout] + uri.query = URI.encode_www_form(**query) headers = { "Content-Length": 0, "x-ms-copy-source": source_uri.to_s, - "x-ms-blob-type": "BlockBlob", + "x-ms-source-range": "bytes=#{index * block_size}-#{[(index + 1) * block_size - 1, source_uri.size - 1].min}", }.merge(additional_headers(options)) - Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put + Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put(content) + + block_id end # Uploads a block to a blob. @@ -455,6 +478,38 @@ def put_blob_single(key, content, options = {}) Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put(content.read) end + def put_blob_from_url_multiple(key, source_key, options = {}) + source_client = options.delete(:source_client) || self + + # generate source uri valid for 1 hour + source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 3600).utc.iso8601) + + block_size = options[:block_size] || DEFAULT_BLOCK_SIZE + block_count = (source_client.get_blob_properties(source_key).size.to_f / block_size).ceil + block_ids = block_count.times.map do |i| + put_blob_block_from_url(key, source_uri, i, block_size, options.slice(:timeout)) + end + + commit_blob_blocks(key, block_ids, options) + end + + def put_blob_from_url_single(key, source_key, options = {}) + source_client = options.delete(:source_client) || self + uri = generate_uri("#{container}/#{key}") + uri.query = URI.encode_www_form(timeout: options[:timeout]) if options[:timeout] + + # generate source uri valid for 1 hour + source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 3600).utc.iso8601) + + headers = { + "Content-Length": 0, + "x-ms-copy-source": source_uri.to_s, + "x-ms-blob-type": "BlockBlob", + }.merge(additional_headers(options)) + + Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put + end + def content_size(content) if content.respond_to?(:bytesize) content.bytesize From 148f98f91fccbed0a395f04d48df63d6277f2d9e Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:09:35 -0600 Subject: [PATCH 5/7] chore: fix issue with put_block_from_url where source_size was not used correctly --- lib/azure_blob/client.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 04c0ad3..a0e514a 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -365,7 +365,7 @@ def put_blob(key, source_key, options = {}) # Calls to {Put Block From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-from-url] # # Returns the id of the block. Required to commit the list of blocks to a blob. - def put_blob_block_from_url(key, source_uri, index, block_size, options = {}) + def put_blob_block_from_url(key, source_uri, index, block_size, source_size, options = {}) block_id = generate_block_id(index) uri = generate_uri("#{container}/#{key}") query = { comp: "block", blockid: block_id } @@ -375,7 +375,7 @@ def put_blob_block_from_url(key, source_uri, index, block_size, options = {}) headers = { "Content-Length": 0, "x-ms-copy-source": source_uri.to_s, - "x-ms-source-range": "bytes=#{index * block_size}-#{[(index + 1) * block_size - 1, source_uri.size - 1].min}", + "x-ms-source-range": "bytes=#{index * block_size}-#{[(index + 1) * block_size - 1, source_size - 1].min}", }.merge(additional_headers(options)) Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put(content) @@ -485,9 +485,9 @@ def put_blob_from_url_multiple(key, source_key, options = {}) source_uri = source_client.signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 3600).utc.iso8601) block_size = options[:block_size] || DEFAULT_BLOCK_SIZE - block_count = (source_client.get_blob_properties(source_key).size.to_f / block_size).ceil + block_count = (options[:content_size].to_f / block_size).ceil block_ids = block_count.times.map do |i| - put_blob_block_from_url(key, source_uri, i, block_size, options.slice(:timeout)) + put_blob_block_from_url(key, source_uri, i, block_size, options[:content_size], options.slice(:timeout)) end commit_blob_blocks(key, block_ids, options) From 17aee7e39d9e3de5608f486bf432b2bca76358e4 Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:33:19 -0600 Subject: [PATCH 6/7] chore: remove content argument from Http put call in put_blob_block_from_url --- lib/azure_blob/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index a0e514a..bae5e8a 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -378,7 +378,7 @@ def put_blob_block_from_url(key, source_uri, index, block_size, source_size, opt "x-ms-source-range": "bytes=#{index * block_size}-#{[(index + 1) * block_size - 1, source_size - 1].min}", }.merge(additional_headers(options)) - Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put(content) + Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put block_id end From e4254b16f3852be8808986d48658ea6dae81c69c Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 5 Feb 2026 14:09:15 -0600 Subject: [PATCH 7/7] chore: add in tests for client.put_blob --- test/client/test_client.rb | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/client/test_client.rb b/test/client/test_client.rb index 77d43a9..d727a58 100644 --- a/test/client/test_client.rb +++ b/test/client/test_client.rb @@ -190,6 +190,26 @@ def test_copy assert_equal content, client.get_blob(copy_key) end + def test_single_block_put_blob_from_url + client.create_block_blob(key, content) + assert_equal content, client.get_blob(key) + + copy_key = "#{key}_copy" + client.put_blob(copy_key, key) + + assert_equal content, client.get_blob(copy_key) + end + + def test_multi_block_put_blob_from_url + client.create_block_blob(key, content) + assert_equal content, client.get_blob(key) + + copy_key = "#{key}_copy" + client.put_blob(copy_key, key, block_size: 1) + + assert_equal content, client.get_blob(copy_key) + end + def test_delete client.create_block_blob(key, content) assert_equal content, client.get_blob(key)