planetwars.dev/planetwars-matchrunner/src/docker_runner.rs

238 lines
7.4 KiB
Rust
Raw Normal View History

2022-01-22 13:32:43 +00:00
use std::io;
use std::path::PathBuf;
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use bollard::container::{self, AttachContainerOptions, AttachContainerResults, LogOutput};
use bollard::Docker;
use bytes::Bytes;
use futures::{Stream, StreamExt};
use tokio::io::{AsyncWrite, AsyncWriteExt};
use tokio::sync::mpsc;
use tokio::time::timeout;
use crate::match_context::{EventBus, PlayerHandle, RequestError, RequestMessage};
2022-02-23 20:08:56 +00:00
use crate::match_log::{MatchLogMessage, MatchLogger, StdErrMessage};
2022-01-22 13:32:43 +00:00
use crate::BotSpec;
#[derive(Clone, Debug)]
pub struct DockerBotSpec {
pub image: String,
pub code_path: PathBuf,
pub argv: Vec<String>,
}
#[async_trait]
impl BotSpec for DockerBotSpec {
async fn run_bot(
&self,
player_id: u32,
event_bus: Arc<Mutex<EventBus>>,
2022-02-23 20:08:56 +00:00
match_logger: MatchLogger,
2022-01-22 13:32:43 +00:00
) -> Box<dyn PlayerHandle> {
let process = spawn_docker_process(self).await.unwrap();
2022-02-23 20:08:56 +00:00
let (handle, runner) = create_docker_bot(process, player_id, event_bus, match_logger);
tokio::spawn(runner.run());
2022-01-22 13:32:43 +00:00
return Box::new(handle);
}
}
async fn spawn_docker_process(
params: &DockerBotSpec,
) -> Result<ContainerProcess, bollard::errors::Error> {
let docker = Docker::connect_with_socket_defaults()?;
let bot_code_dir = std::fs::canonicalize(&params.code_path).unwrap();
let code_dir_str = bot_code_dir.as_os_str().to_str().unwrap();
2022-05-21 14:44:58 +00:00
let memory_limit = 512 * 1024 * 1024; // 512MB
2022-01-22 13:32:43 +00:00
let config = container::Config {
image: Some(params.image.clone()),
host_config: Some(bollard::models::HostConfig {
binds: Some(vec![format!("{}:{}", code_dir_str, "/workdir")]),
2022-05-21 14:44:58 +00:00
network_mode: Some("none".to_string()),
memory: Some(memory_limit),
memory_swap: Some(memory_limit),
// TODO: this applies a limit to how much cpu one bot can use.
// when running multiple bots concurrently though, the server
// could still become resource-starved.
cpu_period: Some(100_000),
cpu_quota: Some(10_000),
2022-01-22 13:32:43 +00:00
..Default::default()
}),
working_dir: Some("/workdir".to_string()),
cmd: Some(params.argv.clone()),
attach_stdin: Some(true),
attach_stdout: Some(true),
attach_stderr: Some(true),
open_stdin: Some(true),
2022-05-21 14:44:58 +00:00
network_disabled: Some(true),
2022-01-22 13:32:43 +00:00
..Default::default()
};
let response = docker
.create_container::<&str, String>(None, config)
.await?;
let container_id = response.id;
docker
.start_container::<String>(&container_id, None)
.await?;
let AttachContainerResults { output, input } = docker
.attach_container(
&container_id,
Some(AttachContainerOptions::<String> {
stdout: Some(true),
stderr: Some(true),
stdin: Some(true),
stream: Some(true),
2022-02-23 20:08:56 +00:00
// setting this to true causes duplicate error output. Why?
logs: Some(false),
2022-01-22 13:32:43 +00:00
..Default::default()
}),
)
.await?;
Ok(ContainerProcess {
docker,
container_id,
2022-01-22 13:32:43 +00:00
stdin: input,
output,
})
}
2022-02-23 20:08:56 +00:00
struct ContainerProcess {
docker: Docker,
container_id: String,
2022-01-22 13:32:43 +00:00
stdin: Pin<Box<dyn AsyncWrite + Send>>,
output: Pin<Box<dyn Stream<Item = Result<LogOutput, bollard::errors::Error>> + Send>>,
}
impl ContainerProcess {
// &mut is required here to make terminate().await Sync
async fn terminate(&mut self) -> Result<(), bollard::errors::Error> {
self.docker
.remove_container(
&self.container_id,
Some(bollard::container::RemoveContainerOptions {
force: true,
..Default::default()
}),
)
.await
}
}
2022-01-22 13:32:43 +00:00
fn create_docker_bot(
2022-02-23 20:08:56 +00:00
process: ContainerProcess,
2022-01-22 13:32:43 +00:00
player_id: u32,
event_bus: Arc<Mutex<EventBus>>,
2022-02-23 20:08:56 +00:00
match_logger: MatchLogger,
2022-01-22 13:32:43 +00:00
) -> (DockerBotHandle, DockerBotRunner) {
let (tx, rx) = mpsc::unbounded_channel();
let bot_handle = DockerBotHandle { tx };
let bot_runner = DockerBotRunner {
2022-02-23 20:08:56 +00:00
process,
2022-01-22 13:32:43 +00:00
player_id,
event_bus,
2022-02-23 20:08:56 +00:00
match_logger,
2022-01-22 13:32:43 +00:00
rx,
};
(bot_handle, bot_runner)
}
pub struct DockerBotHandle {
tx: mpsc::UnboundedSender<RequestMessage>,
}
impl PlayerHandle for DockerBotHandle {
fn send_request(&mut self, r: RequestMessage) {
self.tx
.send(r)
.expect("failed to send message to local bot");
}
}
pub struct DockerBotRunner {
2022-02-23 20:08:56 +00:00
process: ContainerProcess,
2022-01-22 13:32:43 +00:00
event_bus: Arc<Mutex<EventBus>>,
rx: mpsc::UnboundedReceiver<RequestMessage>,
2022-02-23 20:08:56 +00:00
match_logger: MatchLogger,
2022-01-22 13:32:43 +00:00
player_id: u32,
}
impl DockerBotRunner {
2022-02-23 20:08:56 +00:00
pub async fn run(mut self) {
2022-01-22 13:32:43 +00:00
while let Some(request) = self.rx.recv().await {
2022-02-23 20:08:56 +00:00
let resp_fut = self.communicate(&request.content);
let result = timeout(request.timeout, resp_fut).await;
let request_response = match result {
Ok(Ok(response)) => Ok(response.to_vec()),
// this one happens when a bot output stream ends, map this to Timeout for now
Ok(Err(_read_error)) => Err(RequestError::Timeout),
2022-01-22 13:32:43 +00:00
Err(_elapsed) => Err(RequestError::Timeout),
};
let request_id = (self.player_id, request.request_id);
self.event_bus
.lock()
.unwrap()
2022-02-23 20:08:56 +00:00
.resolve_request(request_id, request_response);
2022-01-22 13:32:43 +00:00
}
self.process
.terminate()
.await
.expect("could not terminate process");
2022-01-22 13:32:43 +00:00
}
2022-02-23 20:08:56 +00:00
pub async fn communicate(&mut self, input: &[u8]) -> io::Result<Bytes> {
self.write_line(input).await?;
self.read_line().await
}
async fn write_line(&mut self, bytes: &[u8]) -> io::Result<()> {
self.process.stdin.write_all(bytes).await?;
self.process.stdin.write_u8(b'\n').await?;
self.process.stdin.flush().await?;
Ok(())
}
async fn read_line(&mut self) -> io::Result<Bytes> {
while let Some(item) = self.process.output.next().await {
let log_output = item.expect("failed to get log output");
match log_output {
LogOutput::StdOut { message } => {
// TODO: this is not correct (buffering and such)
return Ok(message);
}
LogOutput::StdErr { mut message } => {
// TODO
if message.ends_with(b"\n") {
message.truncate(message.len() - 1);
}
for line in message.split(|c| *c == b'\n') {
let message = StdErrMessage {
player_id: self.player_id,
message: String::from_utf8_lossy(line).to_string(),
};
self.match_logger
.send(MatchLogMessage::StdErr(message))
.unwrap();
}
}
_ => (),
}
}
// at this point the stream has ended
// does this mean the container has exited?
Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"no response received",
))
}
2022-01-22 13:32:43 +00:00
}