Skip to content

Commit 14eee1a

Browse files
Release Frogfish
1 parent f8cac10 commit 14eee1a

File tree

160 files changed

+4484
-1145
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

160 files changed

+4484
-1145
lines changed

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
bazel-*
22
out/
3-
WORKSPACE

BUILD

+7
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@ exports_files([
2525
"LICENSE",
2626
])
2727

28+
config_setting(
29+
name = "opt",
30+
values = {
31+
"compilation_mode": "opt",
32+
},
33+
)
34+
2835
# If --define darwinn_portable=1, compile without google3 deps.
2936
config_setting(
3037
name = "darwinn_portable",

Makefile

+19-20
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,15 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
SHELL := /bin/bash
15+
PYTHON3 ?= python3
1516
MAKEFILE_DIR := $(realpath $(dir $(lastword $(MAKEFILE_LIST))))
1617
OUT_DIR := $(MAKEFILE_DIR)/out
1718
OS := $(shell uname -s)
1819

1920
ifeq ($(OS),Linux)
2021
CPU ?= k8
21-
WORKSPACE_PLATFORM_FILE := WORKSPACE.linux
2222
else ifeq ($(OS),Darwin)
2323
CPU ?= darwin
24-
WORKSPACE_PLATFORM_FILE := WORKSPACE.darwin
2524
else
2625
$(error $(OS) is not supported)
2726
endif
@@ -40,23 +39,16 @@ BAZEL_OUT_DIR := $(MAKEFILE_DIR)/bazel-out/$(CPU)-$(COMPILATION_MODE)/bin
4039
# Linux-specific parameters
4140
BAZEL_BUILD_TARGET_Linux := //tflite/public:libedgetpu_direct_all.so
4241
BAZEL_BUILD_FLAGS_Linux := --crosstool_top=@crosstool//:toolchains \
43-
--compiler=gcc \
44-
--linkopt=-l:libusb-1.0.so
42+
--compiler=gcc
4543
BAZEL_BUILD_OUTPUT_FILE_Linux := libedgetpu.so.1.0
4644
BAZEL_BUILD_OUTPUT_SYMLINK_Linux := libedgetpu.so.1
4745

48-
ifeq ($(COMPILATION_MODE), opt)
49-
BAZEL_BUILD_FLAGS_Linux += --linkopt=-Wl,--strip-all
50-
endif
5146
ifeq ($(CPU), armv6)
5247
BAZEL_BUILD_FLAGS_Linux += --linkopt=-L/usr/lib/arm-linux-gnueabihf/
5348
endif
5449

5550
# Darwin-specific parameters
5651
BAZEL_BUILD_TARGET_Darwin := //tflite/public:libedgetpu_direct_usb.dylib
57-
BAZEL_BUILD_FLAGS_Darwin := --linkopt=-L/opt/local/lib \
58-
--linkopt=-lusb-1.0 \
59-
--copt=-fvisibility=hidden
6052
BAZEL_BUILD_OUTPUT_FILE_Darwin := libedgetpu.1.0.dylib
6153
BAZEL_BUILD_OUTPUT_SYMLINK_Darwin := libedgetpu.1.dylib
6254

@@ -65,11 +57,10 @@ BAZEL_BUILD_FLAGS := --sandbox_debug --subcommands \
6557
--experimental_repo_remote_exec \
6658
--compilation_mode=$(COMPILATION_MODE) \
6759
--define darwinn_portable=1 \
68-
--copt=-DSTRIP_LOG=1 \
69-
--copt=-fno-rtti \
70-
--copt=-fno-exceptions \
71-
--copt='-D__FILE__=""' \
72-
--cpu=$(CPU)
60+
--action_env PYTHON_BIN_PATH=$(shell which $(PYTHON3)) \
61+
--cpu=$(CPU) \
62+
--embed_label='TENSORFLOW_COMMIT=$(shell grep "TENSORFLOW_COMMIT =" $(MAKEFILE_DIR)/workspace.bzl | grep -o '[0-9a-f]\{40\}')' \
63+
--stamp
7364
BAZEL_BUILD_FLAGS += $(BAZEL_BUILD_FLAGS_$(OS))
7465
BAZEL_BUILD_TARGET := $(BAZEL_BUILD_TARGET_$(OS))
7566
BAZEL_BUILD_OUTPUT_FILE := $(BAZEL_BUILD_OUTPUT_FILE_$(OS))
@@ -95,23 +86,31 @@ endif
9586
.PHONY: libedgetpu \
9687
libedgetpu-direct \
9788
libedgetpu-throttled \
98-
workspace \
89+
deb \
90+
deb-armhf \
91+
deb-arm64 \
9992
clean
10093

10194
libedgetpu: libedgetpu-direct libedgetpu-throttled
10295

103-
libedgetpu-direct: workspace
96+
libedgetpu-direct:
10497
bazel build $(BAZEL_BUILD_FLAGS) $(BAZEL_BUILD_TARGET)
10598
$(call copy_out,direct)
10699
$(call strip_out,direct)
107100

108-
libedgetpu-throttled: workspace
101+
libedgetpu-throttled:
109102
bazel build $(BAZEL_BUILD_FLAGS) --copt=-DTHROTTLE_EDGE_TPU $(BAZEL_BUILD_TARGET)
110103
$(call copy_out,throttled)
111104
$(call strip_out,throttled)
112105

113-
workspace: bazel/WORKSPACE bazel/$(WORKSPACE_PLATFORM_FILE)
114-
cat $^ > WORKSPACE
106+
deb:
107+
dpkg-buildpackage -rfakeroot -us -uc -tc -b
108+
109+
deb-armhf:
110+
dpkg-buildpackage -rfakeroot -us -uc -tc -b -a armhf -d
111+
112+
deb-arm64:
113+
dpkg-buildpackage -rfakeroot -us -uc -tc -b -a arm64 -d
115114

116115
clean:
117116
rm -rf $(OUT_DIR)

WORKSPACE

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
workspace(name = "libedgetpu")
15+
16+
load(":workspace.bzl", "libedgetpu_dependencies")
17+
18+
libedgetpu_dependencies()
19+
load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
20+
tf_workspace(tf_repo_name = "org_tensorflow")
21+
22+
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
23+
cc_crosstool(name = "crosstool")

api/BUILD

+18-16
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
load("//:libedgetpu_cc_rules.bzl", "libedgetpu_cc_library")
16+
1517
# Description:
1618
# Darwinn API headers
1719
load(
@@ -23,12 +25,12 @@ package(default_visibility = ["//visibility:public"])
2325

2426
licenses(["notice"])
2527

26-
cc_library(
28+
libedgetpu_cc_library(
2729
name = "chip",
2830
hdrs = ["chip.h"],
2931
)
3032

31-
cc_library(
33+
libedgetpu_cc_library(
3234
name = "tensor_util",
3335
srcs = ["tensor_util.cc"],
3436
hdrs = ["tensor_util.h"],
@@ -39,7 +41,7 @@ cc_library(
3941
],
4042
)
4143

42-
cc_library(
44+
libedgetpu_cc_library(
4345
name = "layer_information",
4446
srcs = ["layer_information.cc"],
4547
hdrs = ["layer_information.h"],
@@ -51,7 +53,7 @@ cc_library(
5153
],
5254
)
5355

54-
cc_library(
56+
libedgetpu_cc_library(
5557
name = "request",
5658
hdrs = ["request.h"],
5759
deps = [
@@ -60,7 +62,7 @@ cc_library(
6062
],
6163
)
6264

63-
cc_library(
65+
libedgetpu_cc_library(
6466
name = "driver",
6567
hdrs = ["driver.h"],
6668
deps = [
@@ -73,7 +75,7 @@ cc_library(
7375
],
7476
)
7577

76-
cc_library(
78+
libedgetpu_cc_library(
7779
name = "driver_options_helper",
7880
srcs = ["driver_options_helper.cc"],
7981
hdrs = ["driver_options_helper.h"],
@@ -83,20 +85,20 @@ cc_library(
8385
],
8486
)
8587

86-
cc_library(
88+
libedgetpu_cc_library(
8789
name = "allocated_buffer",
8890
srcs = ["allocated_buffer.cc"],
8991
hdrs = ["allocated_buffer.h"],
9092
deps = ["//port"],
9193
)
9294

93-
cc_library(
95+
libedgetpu_cc_library(
9496
name = "dram_buffer",
9597
hdrs = ["dram_buffer.h"],
9698
deps = ["//port"],
9799
)
98100

99-
cc_library(
101+
libedgetpu_cc_library(
100102
name = "buffer",
101103
srcs = ["buffer.cc"],
102104
hdrs = ["buffer.h"],
@@ -107,7 +109,7 @@ cc_library(
107109
],
108110
)
109111

110-
cc_library(
112+
libedgetpu_cc_library(
111113
name = "driver_factory",
112114
srcs = ["driver_factory.cc"],
113115
hdrs = ["driver_factory.h"],
@@ -119,7 +121,7 @@ cc_library(
119121
],
120122
)
121123

122-
cc_library(
124+
libedgetpu_cc_library(
123125
name = "package_reference",
124126
hdrs = ["package_reference.h"],
125127
deps = [
@@ -130,7 +132,7 @@ cc_library(
130132
],
131133
)
132134

133-
cc_library(
135+
libedgetpu_cc_library(
134136
name = "runtime_version",
135137
hdrs = ["runtime_version.h"],
136138
)
@@ -141,15 +143,15 @@ flatbuffer_cc_library(
141143
flatc_args = [""],
142144
)
143145

144-
cc_library(
146+
libedgetpu_cc_library(
145147
name = "timing",
146148
hdrs = ["timing.h"],
147149
deps = [
148150
"//port",
149151
],
150152
)
151153

152-
cc_library(
154+
libedgetpu_cc_library(
153155
name = "watchdog",
154156
srcs = ["watchdog.cc"],
155157
hdrs = ["watchdog.h"],
@@ -161,7 +163,7 @@ cc_library(
161163
],
162164
)
163165

164-
cc_library(
166+
libedgetpu_cc_library(
165167
name = "telemeter_interface",
166168
hdrs = [
167169
"telemeter_interface.h",
@@ -171,7 +173,7 @@ cc_library(
171173
],
172174
)
173175

174-
cc_library(
176+
libedgetpu_cc_library(
175177
name = "execution_context_interface",
176178
hdrs = [
177179
"execution_context_interface.h",

api/driver.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,7 @@ class Driver {
8585
// integer values may be different from those in NNAPI or other APIs. The
8686
// values here are defined in the order of priority when there are multiple
8787
// models requesting different preferences (e.g. sustained speed takes
88-
// priority over low power). For more information, please see:
89-
// http://go/noronha-execution-preference
88+
// priority over low power).
9089
enum class ExecutionPreference {
9190
// Run at the absolute maximum performance.
9291
kSingleFastAnswer = 0,

api/layer_information.cc

+35-25
Original file line numberDiff line numberDiff line change
@@ -203,30 +203,41 @@ bool OutputLayerInformation::NeedsRelayout() const {
203203
// TODO Add unit tests for this method.
204204
util::Status OutputLayerInformation::Relayout(unsigned char* dest,
205205
const unsigned char* src) const {
206-
// TODO: re-use the same buffer and avoid an unnecessary memcopy
207-
// when relayout is not needed.
208-
if (!NeedsRelayout()) {
209-
memcpy(dest, src,
210-
batch_dim() * y_dim() * x_dim() * z_dim() * DataTypeSize());
211-
return util::OkStatus();
212-
}
213-
214-
if (output_layer_->shape_info()) {
215-
// If output shape info exists in the executable, use the new re-layout
216-
// function. Currently, this is only enabled for models with multiple
217-
// batches.
218-
return RelayoutWithShapeInformation(dest, src);
219-
}
220-
221206
const auto data_type_size = DataTypeSize();
222207
const int z_bytes = z_dim() * data_type_size;
208+
const int executions = execution_count_per_inference();
209+
210+
if (executions == 1) {
211+
// Handle case when execution count is equal to 1, since if execution count
212+
// is greater than 1, there might be padding data in-between.
213+
214+
// TODO: re-use the same buffer and avoid an unnecessary
215+
// memcopy when relayout is not needed.
216+
if (!NeedsRelayout()) {
217+
memcpy(dest, src, batch_dim() * y_dim() * x_dim() * z_bytes);
218+
return util::OkStatus();
219+
}
220+
221+
if (output_layer_->shape_info()) {
222+
// If output shape info exists in the executable, use the new re-layout
223+
// function. Currently, this is only enabled for models with multiple
224+
// batches.
225+
return RelayoutWithShapeInformation(dest, src);
226+
}
227+
} else if (PaddedSizeBytes() == ActualSizeBytes() && !NeedsRelayout()) {
228+
// Use memcpy if `executions` is greater than 1 and there is no internal
229+
// padding between iterations.
230+
if (dest != src) {
231+
memcpy(dest, src, ActualSizeBytes());
232+
}
233+
return util::OkStatus();
234+
}
223235

224236
if (y_dim() == 1 && x_dim() == 1) {
225237
// One dimensional output (only z-dimension).
226238
if (src != dest) {
227239
const int padded_size_bytes = PaddedSizeBytes();
228240
const int actual_size_bytes = ActualSizeBytes();
229-
const int executions = execution_count_per_inference();
230241
if (executions == 1 || padded_size_bytes == actual_size_bytes) {
231242
memcpy(dest, src, z_bytes * executions);
232243
} else {
@@ -276,8 +287,7 @@ util::Status OutputLayerInformation::Relayout(unsigned char* dest,
276287
// provided we have a guaranteed way of ensuring this function would be inlined
277288
// so that the compiler optimizations based on compile-time-constants can kick
278289
// in.
279-
#define RELAYOUT_WITH_Z_BYTES_SPECIALIZATION( \
280-
num_z_bytes, num_z_bytes_padded) \
290+
#define RELAYOUT_WITH_Z_BYTES_SPECIALIZATION(num_z_bytes, num_z_bytes_padded) \
281291
do { \
282292
for (int y = 0; y < y_dim(); ++y) { \
283293
const auto y_buffer_index = GetYBufferIndex(y); \
@@ -330,6 +340,12 @@ util::Status OutputLayerInformation::Relayout(unsigned char* dest,
330340
active_tile_x_sizes.size() > 1 || first_y_tile != last_y_tile;
331341

332342
if (need_relayout) {
343+
// TODO: If iteration count is more than 1, we need to make
344+
// sure we advance 'src' and 'dest' correctly due to padding issue. We
345+
// don't have test case now.
346+
CHECK_EQ(executions, 1)
347+
<< "Verification is missing if execution count is greater than 1";
348+
333349
// If there's no z padding, copy one xz block on one tile at a time.
334350
for (int y = 0; y < y_dim(); ++y) {
335351
const auto y_buffer_index = GetYBufferIndex(y);
@@ -347,17 +363,11 @@ util::Status OutputLayerInformation::Relayout(unsigned char* dest,
347363
}
348364
} else {
349365
// TODO: avoid copy and assign in caller directly.
350-
memcpy(dest, src, x_dim() * y_dim() * z_bytes);
366+
memcpy(dest, src, x_dim() * y_dim() * z_bytes * executions);
351367
}
352368
}
353369

354370
#undef RELAYOUT_WITH_Z_BYTES_SPECIALIZATION
355-
356-
// TODO: If iteration count is more than 1, we need to make sure we
357-
// advance 'src' and 'dest' correctly due to padding issue. We don't have
358-
// test case now.
359-
CHECK_EQ(execution_count_per_inference(), 1)
360-
<< "Verification is missing if execution count is greater than 1";
361371
}
362372

363373
return util::OkStatus();

0 commit comments

Comments
 (0)