View Source FLAMESlurmBackend (flame_slurm_backend v0.0.3-rc.0)
Slurm Backend implementation.
Usage
Configure the flame backend in our configuration or application setup:
write a jobscript
#!/bin/bash
#SBATCH -o flame.%j.out
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --time=01:00:00
#SBATCH --mem=20G
export SLURM_FLAME_HOST=$(ip -f inet addr show ib0 | awk '/inet/ {print $2}' | cut -d/ -f1)
setting a Partition with GPU instances and 1 GPU per job
#SBATCH --partition=gpu
#SBATCH --gpus-per-node=1
# application.ex
children = [
{FLAME.Pool,
name: MyApp.SamplePool,
code_sync: [
start_apps: true,
sync_beams: Kino.beam_paths(),
compress: false,
extract_dir: {
FLAMESlurmBackend.SlurmClient,
:path_job_id,
[Path.absname("extract_dir")<>"/"]
}
],
min: 0,
max: 1,
max_concurrency: 1,
idle_shutdown_after: :timer.minutes(10),
timeout: :infinity,
boot_timeout: 360000,
track_resources: true,
backend: {
FLAMESlurmBackend,
slurm_job: <jobscript>
}
}
]
when running inside a Livebook:
Start the Livebook with a matching Host part of the Erlang long name: Using LIVEBOOK_IP=0.0.0.0 is helpful if you create a portforwarding from the login Host of the Cluster.
#!/bin/bash
export SLURM_FLAME_HOST=$(ip -f inet addr show ib0 | awk '/inet/ {print $2}' | cut -d/ -f1)
epmd -daemon
LIVEBOOK_IP=0.0.0.0 livebook server --name livebook@$SLURM_FLAME_HOST
Kino.start_child(
{FLAME.Pool,
name: :runner,
code_sync: [start_apps: true, sync_beams: Kino.beam_paths(), compress: false],
min: 0,
max: 1,
max_concurrency: 10,
idle_shutdown_after: :timer.minutes(1),
timeout: :infinity,
track_resources: true,
backend: {FLAMESlurmBackend, slurm_job: <jobscript>}}
)