Merge pull request #1 from emilyzfliu/1214update

merge to local main branch
neuronets · Dec 21, 2020 · f2f28e9 · f2f28e9
2 parents 7be6907 + 0d62619
commit f2f28e9
Show file tree

Hide file tree

Showing 8 changed files with 426 additions and 36 deletions.
diff --git a/client.py b/client.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import tensorflow as tf
+import os
+#import numpy as np
+import uuid
+import glob
+from make_model import sample_net
+from file_reader import make_TFRecord_from_nii
+
+class Client():
+    def __init__(self, clientid):
+        self.client_id = clientid
+        shape = self.load_input_output()
+        self.model = sample_net(shape)
+        self.save_prior()
+
+    def load_input_output(self):
+        # example inputs/outputs taken from tf keras Model documentation
+        # in use case, load data from files
+        record_file = 'tfrecord_'+self.client_id+'.tfrec'
+        shape = make_TFRecord_from_nii('data', '*_imgs*', '*_labels*', record_file)
+        self.dataset = tf.data.TFRecordDataset(record_file)
+
+        return shape
+        #self.inputs = tf.keras.Input(shape=(3,))
+        #x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(self.inputs)
+        #self.outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x)
+
+    def save_prior(self):
+        self.model.save_weights('prior-'+self.client_id+'.h5', save_format = 'h5')
+
+    def train(self):
+        _op = 'adam'
+        _loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+        _metrics = ['accuracy']
+        self.model.compile(optimizer=_op, loss=_loss, metrics=_metrics)
+        #self.model.fit(inputs=self.inputs, outputs=self.outputs, epochs=1, verbose=2)
+        self.model.fit(self.dataset, epochs=1, verbose=2)
+
+    def load_consolidated(self):
+        list_of_files = glob.glob('/server/*')
+        latest_file = max(list_of_files, key=os.path.getctime)
+        self.model.load_weights(latest_file)
+        return latest_file
+
+    def save_weights(self):
+        filename = 'server/consolidated-'+self.client_id+'-'+uuid.uuid4().__str__()+'.h5'
+        self.model.save_weights(filename, save_format = 'h5')
+
+# example usage
+
+a = Client('A')
+try:
+    a.load_consolidated()
+except:
+    pass
+a.train()
+a.save_weights()
diff --git a/dwc_client.py b/dwc_client.py
@@ -5,15 +5,19 @@
 import tensorflow_probability as tfp
 import os
 import numpy as np
-import asyncio
-from fastapi import FastAPI, WebSocket, File, UploadFile
-from fastapi.responses import HTMLResponse, FileResponse
-import websockets
-import uuid
+#import asyncio
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import FileResponse
+#import websockets
+#import uuid
 import glob
 
+app = FastAPI()
+
 client_id = 'A'
-model = None
+
+# make dummy input/output data
+
 inputs = None
 outputs = None
 
@@ -22,25 +26,29 @@ def sample_net(input_shape,
     activation = tf.nn.relu,
     batch_size = None):
     def one_layer(x, dilation_rate=(1, 1, 1)):
-        x = tfpl.Convolution3DReparameterization(
-                filters,
+        x = tfp.layers.Convolution3DReparameterization(
                 kernel_size=3,
                 padding="same",
                 dilation_rate=dilation_rate,
                 activation=activation,
                 name="layer/vwnconv3d",
             )(x)
-        x = tfkl.Activation(activation, name="layer/activation")(x)
+        x = tf.keras.layers.Activation(activation, name="layer/activation")(x)
         return x
 
     inputs = tf.keras.layers.input(shape = input_shape, batch_size=batch_size, name="inputs")
     x = one_layer(inputs)
 
     return tf.keras.Model(inputs=inputs, outputs=x)
 
-def train(inputs, outputs):
-    if model == None:
-        model = sample_net(np.shape(inputs))
+
+
+model = sample_net(np.shape(inputs))
+model.save_weights('prior-'+client_id+'.h5', save_format = 'h5')
+
+
+
+def train(inputs, outputs, model):
     # input shape: 4 x 1
     _op = 'adam'
     _loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
@@ -62,5 +70,5 @@ async def send_weights():
 @app.post("/")
 async def load_consolidated():
     model.load_weights(most_recent_consolidated())
-    train()
+    train(inputs, outputs, model)
     return {'consolidated weights': model}
diff --git a/dwc_implementation.py b/dwc_implementation.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: Aakanksha Rana, Emi Z Liu
+"""
+import numpy as np
+
+def distributed_weight_consolidation(model_weights, model_priors):
+    # models is a list of weights of client-models; models = [model1, model2, model3...]
+    num_layers =  int(len(model_weights[0])/2.0)
+    num_datasets  = np.shape(model_weights)[0]
+    consolidated_model = model_weights[0]
+    mean_idx = [i for i in range(len(model_weights[0])) if i % 2 == 0]
+    std_idx = [i for i in range(len(model_weights[0])) if i % 2 != 0]
+    ep = 1e-5
+    for i in range(num_layers):
+        num_1 = 0; num_2 = 0
+        den_1 = 0; den_2 = 0
+        for m in range(num_datasets):
+            model = model_weights[m]
+            prior = model_priors[m]
+            mu_s = model[mean_idx[i]]
+            mu_o = prior[mean_idx[i]]
+            sig_s = model[std_idx[i]]
+            sig_o = prior[std_idx[i]]
+            d1 = np.power(sig_s,2) + ep; d2= np.power(sig_o,2) + ep
+            num_1 += (mu_s/d1)
+            num_2 += (mu_o/d2)
+            den_1 += (1.0/d1)
+            if m != num_datasets-1:
+                den_2 +=(1.0/d2)
+        consolidated_model[mean_idx[i]] =  (num_1 - num_2)/(den_1 -den_2)
+        consolidated_model[std_idx[i]] =  1/(den_1 -den_2)
+    return consolidated_model
+
diff --git a/dwc_server.py b/dwc_server.py
@@ -3,10 +3,10 @@
 
 import tensorflow as tf
 import os
-import asyncio
-from fastapi import FastAPI, WebSocket, File, UploadFile
-from fastapi.responses import HTMLResponse, FileResponse
-import websockets
+#import asyncio
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import FileResponse
+#import websockets
 import copy
 import numpy as np
 import uuid
@@ -16,10 +16,12 @@
 datasets = []
 num_clients = 1
 model = None
+priors = []
+client_ids = []
 
-@app.get('/')
-async def root:
-    return 'root'
+@app.get("/")
+async def root():
+    return "root"
 
 @app.get("/{model}/")
 async def update_model(model: tf.keras.Model):
@@ -33,39 +35,44 @@ async def load_weights(image: UploadFile = File(...)): #do we still need to load
     return {'consolidated weights': model}
 
 def load_data():
+    datasets = []
+    client_ids = []
+    path = os.getcwd()
     for root, dirs, files in os.walk(path):
         for filename in files:
             if filename[:6] != 'model-':
                 continue
             model.load_weights(filename)
             datasets.append(copy.deepcopy(model))
-
+            client_ids.append(filename[6:-3])
+
+def load_priors():
+    if not priors:
+        prior = None
+        for client in client_ids:
+            filename = 'prior-'+client+'.h5'
+            prior.load_weights(filename)
+            priors.append(copy.deepcopy(prior))
 
 async def dwc_frame():
-    #load weights
     load_data()
+    load_priors()
 
-    # need to figure out how to only run this code after all (or most) client weights
-
-    # dwc implementation
-    # what are the priors
     model = distributed_weight_consolidation(datasets, priors)
 
     model.save_weights(
         'server/consolidated-'+uuid.uuid4().__str__()+'.h5',
         save_format = 'h5')
+
     update_model(model)
 
-
-# LOOK AT THIS LATERRRRR
-
-async def distributed_weight_consolidation(models_weights, model_priors):
+async def distributed_weight_consolidation(model_weights, model_priors):
     # models is a list of weights of client-models; models = [model1, model2, model3...]
     num_layers =  int(len(model_weights[0])/2.0)
     num_datasets  = np.shape(model_weights)[0]
     consolidated_model = model_weights[0]
-    mean_idx = [i for i in range(0,len(model_weights[0])) if i % 2 == 0]
-    std_idx = [i for i in range(0,len(model_weights[0])) if i % 2 != 0]
+    mean_idx = [i for i in range(len(model_weights[0])) if i % 2 == 0]
+    std_idx = [i for i in range(len(model_weights[0])) if i % 2 != 0]
     ep = 1e-5
     for i in range(num_layers):
         num_1 = 0; num_2 = 0
@@ -78,10 +85,11 @@ async def distributed_weight_consolidation(models_weights, model_priors):
             sig_s = model[std_idx[i]]
             sig_o = prior[std_idx[i]]
             d1 = np.power(sig_s,2) + ep; d2= np.power(sig_o,2) + ep
-            num_1 = num_1 + (mu_s/d1)
-            num_2 = num_2 + (mu_o/d2)
-            den_1 = den_1 + (1.0/d1)
-            den_2 = den_2 + (1.0/d2)
+            num_1 += (mu_s/d1)
+            num_2 += (mu_o/d2)
+            den_1 += (1.0/d1)
+            if m != num_datasets-1:
+                den_2 +=(1.0/d2)
         consolidated_model[mean_idx[i]] =  (num_1 - num_2)/(den_1 -den_2)
         consolidated_model[std_idx[i]] =  1/(den_1 -den_2)
     return consolidated_model

diff --git a/file_reader.py b/file_reader.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 14 15:33:42 2020
+
+@author: Emi Z Liu
+"""
+
+# from https://github.com/corticometrics/neuroimage-tensorflow genTFrecord.py
+
+
+# Creates a .tfrecord file from a directory of nifti images.
+#   This assumes your niftis are soreted into subdirs by directory, and a regex
+#   can be written to match a volume-filenames and label-filenames
+#
+# USAGE
+#  python ./genTFrecord.py <data-dir> <input-vol-regex> <label-vol-regex>
+# EXAMPLE:
+#  python ./genTFrecord.py ./buckner40 'norm' 'aseg' buckner40.tfrecords
+#
+# Based off of this: 
+#   http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
+
+# imports
+import tensorflow as tf
+import nibabel as nib
+import os, re
+import numpy as np
+
+def make_TFRecord_from_nii(data_dir, v_regex, l_regex, outfile):
+    # RETURN AN INPUT SHAPE!!!
+
+    def _bytes_feature(value):
+    	return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+    def _int64_feature(value):
+    	return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+    def select_hipp(x):
+    	x[x != 17] = 0
+    	x[x == 17] = 1
+    	return x
+
+    def crop_brain(x):
+    	x = x[90:130,90:130,90:130] #should take volume zoomed in on hippocampus area
+    	return x
+
+    def preproc_brain(x):
+    	x = select_hipp(x)
+    	x = crop_brain(x)   
+    	return x
+
+    def listfiles(folder):
+    	for root, folders, files in os.walk(folder):
+    		for filename in folders + files:
+    			yield os.path.join(root, filename)
+
+    def gen_filename_pairs(data_dir, v_re, l_re):
+        unfiltered_filelist=list(listfiles(data_dir))
+        input_list = [item for item in unfiltered_filelist if re.search(v_re,item)]
+        label_list = [item for item in unfiltered_filelist if re.search(l_re,item)]
+        print("input_list size:    ", len(input_list))
+        print("label_list size:    ", len(label_list))
+        if len(input_list) != len(label_list):
+            print("input_list size and label_list size don't match")
+            raise Exception
+        sample_img = nib.load(input_list[0]).get_fdata()
+        return (zip(input_list, label_list), sample_img)
+
+    # parse args - UNCOMMENTED IN ORIGINAL CODE, NOW PASSED AS PARAMS
+    # data_dir = sys.argv[1]
+    # v_regex  = sys.argv[2]
+    # l_regex  = sys.argv[3]
+    # outfile  = sys.argv[4]
+    # print("data_dir:   ", data_dir)
+    # print("v_regex:    ", v_regex )
+    # print("l_regex:    ", l_regex )
+    # print("outfile:    ", outfile )
+
+    # Generate a list of (volume_filename, label_filename) tuples
+    filename_pairs, sample_img = gen_filename_pairs(data_dir, v_regex, l_regex)
+
+    # To compare original to reconstructed images
+    #original_images = []
+
+    writer = tf.python_io.TFRecordWriter(outfile)
+    for v_filename, l_filename in filename_pairs:
+
+    	print("Processing:")
+    	print("  volume: ", v_filename)
+    	print("  label:  ", l_filename)	
+
+    	# The volume, in nifti format	
+    	v_nii = nib.load(v_filename)
+    	# The volume, in numpy format
+    	v_np = v_nii.get_data().astype('int16')
+    	# The volume, in raw string format
+    	v_np = crop_brain(v_np)
+    	# The volume, in raw string format
+    	v_raw = v_np.tostring()
+
+    	# The label, in nifti format
+    	l_nii = nib.load(l_filename)
+    	# The label, in numpy format
+    	l_np = l_nii.get_data().astype('int16')
+    	# Preprocess the volume
+    	l_np = preproc_brain(l_np)
+    	# The label, in raw string format
+    	l_raw = l_np.tostring()
+
+    	# Dimensions
+    	x_dim = v_np.shape[0]
+    	y_dim = v_np.shape[1]
+    	z_dim = v_np.shape[2]
+    	print("DIMS: " + str(x_dim) + str(y_dim) + str(z_dim))
+
+    	data_point = tf.train.Example(features=tf.train.Features(feature={
+    		'image_raw': _bytes_feature(v_raw),
+    		'label_raw': _bytes_feature(l_raw)}))
+
+    	writer.write(data_point.SerializeToString())
+
+    writer.close()
+    return sample_img.shape