谷歌Pixel 2人像模式代码曝光,你看懂了吗?

猿友 2018-03-16 18:34:11 浏览数 (5989)

谷歌把他们所应用的 AI 图像分层算法 DeepLab-v3+ 变成开源代码,让第三方相机 app 都可以利用借此神经网络。


 import tensorflow as tf
 from deeplab.core import feature_extractor
 slim = tf.contrib.slim
 _LOGITS_SCOPE_NAME = 'logits'
 _MERGED_LOGITS_SCOPE = 'merged_logits'
 _IMAGE_POOLING_SCOPE = 'image_pooling'
 _ASPP_SCOPE = 'aspp'
 _CONCAT_PROJECTION_SCOPE = 'concat_projection'
 _DECODER_SCOPE = 'decoder'
 def get_extra_layer_scopes():
 """Gets the scopes for extra layers.
 A list of scopes for extra layers.
 return [
 def predict_labels_multi_scale(images,
 """Predicts segmentation labels.
 images: A tensor of size [batch, height, width, channels].
 model_options: A ModelOptions instance to configure models.
 eval_scales: The scales to resize images for evaluation.
 add_flipped_images: Add flipped images for evaluation or not.
 A dictionary with keys specifying the output_type (e.g., semantic
 prediction) and values storing Tensors representing predictions (argmax
 over channels). Each prediction has size [batch, height, width].
 outputs_to_predictions = {
 output: []
 for output in model_options.outputs_to_num_classes
 for i, image_scale in enumerate(eval_scales):
 with tf.variable_scope(tf.get_variable_scope(), reuse=True if i else None):
 outputs_to_scales_to_logits = multi_scale_logits(
 if add_flipped_images:
 with tf.variable_scope(tf.get_variable_scope(), reuse=True):
 outputs_to_scales_to_logits_reversed = multi_scale_logits(
 tf.reverse_v2(images, [2]),
 for output in sorted(outputs_to_scales_to_logits):
 scales_to_logits = outputs_to_scales_to_logits[output]
 logits = tf.image.resize_bilinear(
 tf.expand_dims(tf.nn.softmax(logits), 4))
 if add_flipped_images:
 scales_to_logits_reversed = (
 logits_reversed = tf.image.resize_bilinear(
 tf.reverse_v2(scales_to_logits_reversed[_MERGED_LOGITS_SCOPE], [2]),
 tf.expand_dims(tf.nn.softmax(logits_reversed), 4))
 for output in sorted(outputs_to_predictions):
 predictions = outputs_to_predictions[output]
 # Compute average prediction across different scales and flipped images.
 predictions = tf.reduce_mean(tf.concat(predictions, 4), axis=4)
 outputs_to_predictions[output] = tf.argmax(predictions, 3)
 return outputs_to_predictions
 def predict_labels(images, model_options, image_pyramid=None):
 """Predicts segmentation labels.
 images: A tensor of size [batch, height, width, channels].
 model_options: A ModelOptions instance to configure models.
 image_pyramid: Input image scales for multi-scale feature extraction.
 A dictionary with keys specifying the output_type (e.g., semantic
 prediction) and values storing Tensors representing predictions (argmax
 over channels). Each prediction has size [batch, height, width].
 outputs_to_scales_to_logits = multi_scale_logits(
 predictions = {}
 for output in sorted(outputs_to_scales_to_logits):
 scales_to_logits = outputs_to_scales_to_logits[output]
 logits = tf.image.resize_bilinear(
 predictions[output] = tf.argmax(logits, 3)
 return predictions
 def scale_dimension(dim, scale):
 """Scales the input dimension.
 dim: Input dimension (a scalar or a scalar Tensor).
 scale: The amount of scaling applied to the input.
 Scaled dimension.
 if isinstance(dim, tf.Tensor):
 return tf.cast((tf.to_float(dim) - 1.0) scale + 1.0, dtype=tf.int32)
 return int((float(dim) - 1.0)
scale + 1.0)
 def multi_scale_logits(images,
 """Gets the logits for multi-scale inputs.
 The returned logits are all downsampled (due to max-pooling layers)
 for both training and evaluation.

0 人点赞