View Source Evision.DNN Example - Object Detection Task with GoogleNet
# set `EVISION_PREFER_PRECOMPILED` to `false`
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM
# System.put_env("EVISION_PREFER_PRECOMPILED", "false")
Mix.install([
{:evision, "~> 0.1.15"},
{:kino, "~> 0.7"},
{:req, "~> 0.3"}
])
:ok
define-some-helper-functions
Define Some Helper Functions
defmodule Helper do
def download!(url, save_as, overwrite \\ false)
def download!(url, save_as, false) do
unless File.exists?(save_as) do
download!(url, save_as, true)
end
:ok
end
def download!(url, save_as, true) do
body =
case Req.get!(url) do
%Req.Response{status: 200, body: body} ->
body
error ->
raise inspect(error)
end
File.write!(save_as, body)
end
end
{:module, Helper, <<70, 79, 82, 49, 0, 0, 10, ...>>, {:download!, 3}}
download-googlenet-model-and-a-test-image
Download GoogLeNet Model and A Test Image
- Model parameters.
bvlc_googlenet.caffemodel
- Model config.
bvlc_googlenet.prototxt
- List of class names.
classification_classes_ILSVRC2012.txt
- Test image.
space_shuttle.jpg
# change to the file's directory
# or somewhere you have write permission
File.cd!(__DIR__)
Helper.download!(
"http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel",
"bvlc_googlenet.caffemodel"
)
Helper.download!(
"https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/bvlc_googlenet.prototxt",
"bvlc_googlenet.prototxt"
)
Helper.download!(
"https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt",
"classification_classes_ILSVRC2012.txt"
)
Helper.download!("https://docs.opencv.org/4.5.4/space_shuttle.jpg", "space_shuttle.jpg")
:ok
read-class-names
Read Class Names
classes =
"classification_classes_ILSVRC2012.txt"
|> File.read!()
|> String.split("\n")
["tench, Tinca tinca", "goldfish, Carassius auratus",
"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias",
"tiger shark, Galeocerdo cuvieri", "hammerhead, hammerhead shark",
"electric ray, crampfish, numbfish, torpedo", "stingray", "cock", "hen",
"ostrich, Struthio camelus", "brambling, Fringilla montifringilla",
"goldfinch, Carduelis carduelis", "house finch, linnet, Carpodacus mexicanus", "junco, snowbird",
"indigo bunting, indigo finch, indigo bird, Passerina cyanea",
"robin, American robin, Turdus migratorius", "bulbul", "jay", "magpie", "chickadee",
"water ouzel, dipper", "kite", "bald eagle, American eagle, Haliaeetus leucocephalus", "vulture",
"great grey owl, great gray owl, Strix nebulosa",
"European fire salamander, Salamandra salamandra", "common newt, Triturus vulgaris", "eft",
"spotted salamander, Ambystoma maculatum", "axolotl, mud puppy, Ambystoma mexicanum",
"bullfrog, Rana catesbeiana", "tree frog, tree-frog",
"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
"loggerhead, loggerhead turtle, Caretta caretta",
"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", "mud turtle", "terrapin",
"box turtle, box tortoise", "banded gecko", "common iguana, iguana, Iguana iguana",
"American chameleon, anole, Anolis carolinensis", "whiptail, whiptail lizard", "agama",
"frilled lizard, Chlamydosaurus kingi", "alligator lizard", "Gila monster, Heloderma suspectum",
"green lizard, Lacerta viridis", "African chameleon, Chamaeleo chamaeleon",
"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis",
"African crocodile, Nile crocodile, Crocodylus niloticus", ...]
load-the-googlenet-model
Load the GoogLeNet Model
alias Evision, as: Cv
model =
Cv.DNN.readNet("bvlc_googlenet.caffemodel",
config: "bvlc_googlenet.prototxt",
framework: ""
)
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}
set-backend-and-target
Set Backend and Target
# "0: automatically (by default), "
# "1: Halide language (http://halide-lang.org/), "
# "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
# "3: OpenCV implementation, "
# "4: VKCOM, "
# "5: CUDA
model = Cv.DNN.Net.setPreferableBackend(model, 0)
# "0: CPU target (by default), "
# "1: OpenCL, "
# "2: OpenCL fp16 (half-float precision), "
# "3: VPU, "
# "4: Vulkan, "
# "6: CUDA, "
# "7: CUDA fp16 (half-float preprocess)
model = Cv.DNN.Net.setPreferableTarget(model, 0)
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}
read-the-test-image-and-set-it-as-the-input
Read the Test Image and Set It as the Input
mat = Cv.imread("space_shuttle.jpg")
blob =
Cv.DNN.blobFromImage(mat,
scalefactor: 1,
swapRB: true,
mean: [-104, -117, -123],
size: [224, 224]
)
model = Cv.DNN.Net.setInput(model, blob, name: "", scalefactor: 1.0, mean: [0, 0, 0])
%Evision.DNN.Net{ref: #Reference<0.4003430890.1440088085.55917>}
run-the-forward-function
Run the Forward Function
start_time = :os.system_time(:millisecond)
pred = Cv.DNN.Net.forward(model, outputName: "")
end_time = :os.system_time(:millisecond)
"Inference time=>#{end_time - start_time} ms"
"Inference time=>49 ms"
get-the-classification-result
Get the Classification Result
pred = pred |> Cv.Mat.to_nx(Nx.BinaryBackend)
pred_class_id = pred |> Nx.argmax() |> Nx.to_flat_list() |> Enum.at(0)
confidence =
pred
|> Nx.take(Nx.tensor(pred_class_id), axis: 1)
|> Nx.to_flat_list()
|> Enum.at(0)
class_label =
classes
|> Enum.at(pred_class_id)
"Predict result: #{class_label}=>#{Float.round(confidence * 100, 2)}"
"Predict result: space shuttle=>99.13"