diff --git a/Makefile b/Makefile index 9de7bae..b36a9af 100644 --- a/Makefile +++ b/Makefile @@ -22,12 +22,12 @@ image: docker build . -t neowaylabs/klb:$(version) credentials: image guard-sh guard-subscription guard-service-principal guard-service-secret - docker run -ti --rm -v `pwd`:/credentials -w /credentials neowaylabs/klbdev:$(version) \ + docker run -ti --rm -v `pwd`:/credentials -w /credentials neowaylabs/klb:$(version) \ /credentials/tools/azure/getcredentials.sh \ $(sh) "$(subscription)" "$(service-principal)" "$(service-secret)" createsp: image guard-subscription-id guard-service-principal guard-service-secret - docker run -ti --rm -v `pwd`:/createsp -w /createsp neowaylabs/klbdev:$(version) \ + docker run -ti --rm -v `pwd`:/createsp -w /createsp neowaylabs/klb:$(version) \ /createsp/tools/azure/createsp.sh \ "$(subscription-id)" "$(service-principal)" "$(service-secret)" diff --git a/azure/blob/fs.sh b/azure/blob/fs.sh index 50c811e..25eb4f8 100644 --- a/azure/blob/fs.sh +++ b/azure/blob/fs.sh @@ -17,8 +17,7 @@ import klb/azure/storage # on the azure container will be a file named "1". # # This was the moment when we started to realize that we had been screwed -# by Azure (again). We never tested the download-batch command supposing that -# it will behave equally bad (maintaining only the basename of files). +# by Azure (again). The download-batch command has other quirks and stupidities too. # # AWS S3 is also flat, but the aws tools and API's provides a proper directory # illusion, you can copy directories pretty much the same way you work with @@ -118,18 +117,42 @@ fn azure_blob_fs_download(fs, localpath, remotepath) { # Downloads a remote dir to a local dir. # It will not create the remote dir on the local dir, only its -# contents (including other dirs, recursively). +# contents (including other dirs inside the remote dir, recursively). # -# For example, downloading the remote dir /klb to the local dir /test -# will copy all contents of /klb to /test, like /test/file1 and /test/dir1/file1 -# but it will not create a "klb" directory inside the local dir. +# For example, lets say you have these files at azure blob storage: +# +# /foo/bar/file1 +# /foo/bar/file2 +# /foo/bar/goo/file1 +# +# Calling azure_blob_fs_download_dir($fs, "/test", "/foo/bar") will create +# these files on your filesystem: +# +# /test/file1 +# /test/file2 +# /test/goo/file1 # # This is the behavior of Plan9 dircp (http://man.cat-v.org/plan_9/1/tar) # and it seems more intuitive than the cp -r behavior of creating only the -# base dir of the source path at the target path if it does not exists -# and to copy only the contents you need the hack of regex expansion: +# base dir of the source path (this case the remote path) +# at the target path if it does not exists +# and to copy only the contents you need the hack of regex expansion like: +# # cp -r /srcdir/* /targetdir. # +# An curiosity is that the az tool actually introduces a third behavior with +# download-batch which is to reproduce the entire directory path of the remote +# path on the local path. The example above according to azure would result in: +# +# /test/foo/bar/file1 +# /test/foo/bar/file2 +# /test/foo/bar/goo/file1 +# +# Which is the only approach who seems to have no upside AT ALL =D. +# This fs layers does it best to shield you from this kind of nonsense. +# +# If localdir does not exists this function will attempt to create it for you. +# # This function returns an error string if it fails and "" if it succeeds. fn azure_blob_fs_download_dir(fs, localdir, remotedir) { remotedir <= _azure_storage_fix_remote_path($remotedir) @@ -141,19 +164,45 @@ fn azure_blob_fs_download_dir(fs, localdir, remotedir) { } # TODO: use timeout, right now az cli does not provide timeout for download-batch - remotedir = $remotedir + "*" + pattern = $remotedir + "*" container <= azure_blob_fs_container($fs) + + mkdir -p $localdir + tmplocal <= mktemp -d + + fn cleanup() { + rm -rf $tmplocal + } + out, status <= ( az storage blob download-batch - --destination $localdir + --destination $tmplocal --source $container --account-name $account --account-key $accountkey - --pattern $remotedir + --pattern $pattern ) + if $status != "0" { + cleanup() return format("error[%s] downloading dir[%s] to[%s]", $out, $remotedir, $localdir) } + + # WHY GOD WHY ? + srcdir <= format("%s/%s", $tmplocal, $remotedir) + srcpaths <= ls $srcdir + srcpaths <= split($srcpaths, "\n") + + for srcpath in $srcpaths { + srcpath <= format("%s/%s", $srcdir, $srcpath) + out, status <= cp -r $srcpath $localdir + if $status != "0" { + cleanup() + return format("error copying: %s", $out) + } + } + + cleanup() return "" } diff --git a/azure/login.sh b/azure/login.sh index e914700..33b11db 100644 --- a/azure/login.sh +++ b/azure/login.sh @@ -20,7 +20,11 @@ fn azure_login() { clientID = $AZURE_CLIENT_ID secretID = $AZURE_CLIENT_SECRET username = $AZURE_SERVICE_PRINCIPAL + + azure_login_credentials($subscriptionID, $tenantID, $clientID, $secretID, $username) +} +fn azure_login_credentials(subscriptionID, tenantID, clientID, secretID, username) { # azure cli 2.0 az account clear az login --service-principal -u $username -p $secretID --tenant $tenantID --output table diff --git a/tests/azure/blobfs_test.go b/tests/azure/blobfs_test.go index 234ced8..d0d881e 100644 --- a/tests/azure/blobfs_test.go +++ b/tests/azure/blobfs_test.go @@ -23,7 +23,6 @@ func TestBlobFS(t *testing.T) { location, testBlobFSUploadsWhenAccountAndContainerExists, ) - testBlobFSDownloadDir(t, timeout, location) testBlobFSUploadDir(t, timeout, location) testBlobFSListFiles(t, timeout, location) @@ -643,6 +642,16 @@ func testBlobFSDownloadDir( } tests := []TestCase{ + { + name: "OneFile", + remoteFiles: []string{ + "/one/test1", + }, + downloadDir: "/one", + wantedFiles: []string{ + "/test1", + }, + }, { name: "Root", remoteFiles: []string{ @@ -667,8 +676,8 @@ func testBlobFSDownloadDir( }, downloadDir: "/dir", wantedFiles: []string{ - "/dir/test1", - "/dir/test2", + "/test1", + "/test2", }, }, { @@ -683,9 +692,27 @@ func testBlobFSDownloadDir( }, downloadDir: "/dir/dir2", wantedFiles: []string{ + "/test1", + "/test2", + }, + }, + { + name: "SubdirsAreDownloaded", + remoteFiles: []string{ + "/test1", + "/test2", + "/dir/test1", + "/dir/test2", "/dir/dir2/test1", "/dir/dir2/test2", }, + downloadDir: "/dir", + wantedFiles: []string{ + "/test1", + "/test2", + "/dir2/test1", + "/dir2/test2", + }, }, } @@ -702,38 +729,10 @@ func testBlobFSDownloadDir( checkStorageBlobAccount(t, f, fs.account, fs.sku, fs.tier) createStorageAccountContainer(f, fs.account, fs.container) - wantedFilesContents := map[string]string{} - for _, remoteFile := range remoteFiles { - expectedContent := fixture.NewUniqueName("random-content") - localFile, cleanup := setupTestFile(t, expectedContent) + localFile, cleanup := setupTestFile(t, remoteFile) fs.Upload(t, f, remoteFile, localFile) cleanup() - - for _, wantedFile := range wantedFiles { - if wantedFile == remoteFile { - wantedFilesContents[wantedFile] = expectedContent - } - } - } - - if len(wantedFilesContents) != len(wantedFiles) { - t.Fatalf( - "wanted files [%s] is not a subset of remote files [%s]", - wantedFiles, - remoteFiles, - ) - } - - for _, wantedFile := range wantedFiles { - if _, ok := wantedFilesContents[wantedFile]; !ok { - t.Errorf("wanted file [%s] was not uploaded", wantedFile) - t.Fatalf( - "wanted files [%s] is not a subset of remote files [%s]", - wantedFiles, - remoteFiles, - ) - } } tmpdir, err := ioutil.TempDir("", "az-download-dir") @@ -759,32 +758,65 @@ func testBlobFSDownloadDir( assert.NoError(t, err) if len(wantedFiles) != len(gotFiles) { + t.Errorf("wanted files and got files len don't match") t.Fatalf("want files[%s] got[%s]", wantedFiles, gotFiles) } + + removeFilePrefix := func(path string) string { + return strings.TrimPrefix(path, tmpdir) + } + + removeFilesPrefix := func(paths []string) []string { + trimmed := make([]string, len(paths)) + for i, path := range paths { + trimmed[i] = removeFilePrefix(path) + } + return trimmed + } + + expectedContent := func(path string) string { + // WHY: we use the original remote file path as + // the content itself (this way the content is unique for each file). + return filepath.Join(downloadDir, path) + } - for wantedFile, wantedFileContents := range wantedFilesContents { + for _, wantedFile := range wantedFiles { got := false for _, gotFile := range gotFiles { - gotFileRelative := strings.TrimPrefix(gotFile, tmpdir) - if gotFileRelative == wantedFile { - got = true - gotContentRaw, err := ioutil.ReadFile(gotFile) - assert.NoError(t, err) - gotContent := string(gotContentRaw) - if wantedFileContents != gotContent { - t.Fatalf( - "found wanted file[%s] but expected content[%s] != [%s]", - wantedFile, - wantedFileContents, - gotContent, - ) - } + gotFileRelative := removeFilePrefix(gotFile) + if gotFileRelative != wantedFile { + continue + } + + got = true + f, err := os.Open(gotFile) + assert.NoError(t, err, "opening file") + defer f.Close() + + contentsr, err := ioutil.ReadAll(f) + assert.NoError(t, err, "reading file") + + contents := string(contentsr) + wantedContents := expectedContent(wantedFile) + + if contents != wantedContents { + t.Fatalf( + "wanted file[%s] contents[%s] got[%s]", + wantedFile, + wantedContents, + contents, + ) } + } if !got { - t.Fatalf("wanted files[%s] got[%s]", wantedFiles, gotFiles) + t.Fatalf( + "wanted files[%s] != got[%s]", + wantedFiles, + removeFilesPrefix(gotFiles), + ) } } }) diff --git a/tools/azure/getcredentials.sh b/tools/azure/getcredentials.sh index ffd4f27..082a3c8 100755 --- a/tools/azure/getcredentials.sh +++ b/tools/azure/getcredentials.sh @@ -1,5 +1,8 @@ #!/usr/bin/env nash +import klb/azure/login + + if len($ARGS) != "5" { echo "Usage: " $ARGS[0] "<(sh|nash)> " @@ -48,6 +51,18 @@ AZURE_CLIENT_ID <= ( tr -d "\n" ) +echo "testing the credentials" + +AZURE_SERVICE_PRINCIPAL = "http://" + $SPNAME + +azure_login_credentials( + $AZURE_SUBSCRIPTION_ID, + $AZURE_TENANT_ID, + $AZURE_CLIENT_ID, + $SPSECRET, + $AZURE_SERVICE_PRINCIPAL, +) + echo echo "environment variables (copy them to a file):" echo @@ -57,6 +72,6 @@ printvar("AZURE_SUBSCRIPTION_NAME", $AZURE_SUBSCRIPTION_NAME) printvar("AZURE_TENANT_ID", $AZURE_TENANT_ID) printvar("AZURE_CLIENT_ID", $AZURE_CLIENT_ID) printvar("AZURE_CLIENT_SECRET", $SPSECRET) -printvar("AZURE_SERVICE_PRINCIPAL", "http://"+$SPNAME) +printvar("AZURE_SERVICE_PRINCIPAL", $AZURE_SERVICE_PRINCIPAL) echo