planetwars.dev/planetwars-matchrunner/src/docker_runner.rs

277 lines
8.4 KiB
Rust
Raw Normal View History

2022-01-22 14:32:43 +01:00
use std::io;
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use bollard::container::{self, AttachContainerOptions, AttachContainerResults, LogOutput};
use bollard::Docker;
use bytes::Bytes;
use futures::{Stream, StreamExt};
use tokio::io::{AsyncWrite, AsyncWriteExt};
use tokio::sync::mpsc;
use tokio::task::JoinHandle;
2022-01-22 14:32:43 +01:00
use tokio::time::timeout;
use crate::match_context::{EventBus, PlayerHandle, RequestError, RequestMessage};
2022-02-23 21:08:56 +01:00
use crate::match_log::{MatchLogMessage, MatchLogger, StdErrMessage};
2022-01-22 14:32:43 +01:00
use crate::BotSpec;
2022-07-24 23:08:51 +02:00
// TODO: this API needs a better design with respect to pulling
// and general container management
2022-01-22 14:32:43 +01:00
#[derive(Clone, Debug)]
pub struct DockerBotSpec {
pub image: String,
2022-07-11 20:43:10 +02:00
pub binds: Option<Vec<String>>,
pub argv: Option<Vec<String>>,
pub working_dir: Option<String>,
2022-07-24 23:08:51 +02:00
pub pull: bool,
pub credentials: Option<Credentials>,
}
#[derive(Clone, Debug)]
pub struct Credentials {
pub username: String,
pub password: String,
2022-01-22 14:32:43 +01:00
}
#[async_trait]
impl BotSpec for DockerBotSpec {
async fn run_bot(
&self,
player_id: u32,
event_bus: Arc<Mutex<EventBus>>,
2022-02-23 21:08:56 +01:00
match_logger: MatchLogger,
2022-01-22 14:32:43 +01:00
) -> Box<dyn PlayerHandle> {
let process = spawn_docker_process(self).await.unwrap();
let handle = run_docker_bot(process, player_id, event_bus, match_logger);
2022-01-22 14:32:43 +01:00
return Box::new(handle);
}
}
async fn spawn_docker_process(
params: &DockerBotSpec,
) -> Result<ContainerProcess, bollard::errors::Error> {
let docker = Docker::connect_with_socket_defaults()?;
2022-07-24 23:08:51 +02:00
if params.pull {
let mut create_image_stream = docker.create_image(
Some(bollard::image::CreateImageOptions {
from_image: params.image.as_str(),
..Default::default()
}),
None,
params
.credentials
.as_ref()
.map(|credentials| bollard::auth::DockerCredentials {
username: Some(credentials.username.clone()),
password: Some(credentials.password.clone()),
..Default::default()
}),
);
while let Some(item) = create_image_stream.next().await {
// just consume the stream for now,
// and make noise when something breaks
let _info = item.expect("hit error in docker pull");
}
}
2022-05-21 16:44:58 +02:00
let memory_limit = 512 * 1024 * 1024; // 512MB
2022-01-22 14:32:43 +01:00
let config = container::Config {
image: Some(params.image.clone()),
host_config: Some(bollard::models::HostConfig {
2022-07-11 20:43:10 +02:00
binds: params.binds.clone(),
2022-05-21 16:44:58 +02:00
network_mode: Some("none".to_string()),
memory: Some(memory_limit),
memory_swap: Some(memory_limit),
2022-06-04 17:21:50 +02:00
// TODO: this seems to have caused weird delays when executing bots
// on the production server. A solution should still be found, though.
// cpu_period: Some(100_000),
// cpu_quota: Some(10_000),
2022-01-22 14:32:43 +01:00
..Default::default()
}),
2022-07-11 20:43:10 +02:00
working_dir: params.working_dir.clone(),
cmd: params.argv.clone(),
2022-01-22 14:32:43 +01:00
attach_stdin: Some(true),
attach_stdout: Some(true),
attach_stderr: Some(true),
open_stdin: Some(true),
2022-05-21 16:44:58 +02:00
network_disabled: Some(true),
2022-01-22 14:32:43 +01:00
..Default::default()
};
let response = docker
.create_container::<&str, String>(None, config)
.await?;
let container_id = response.id;
docker
.start_container::<String>(&container_id, None)
.await?;
let AttachContainerResults { output, input } = docker
.attach_container(
&container_id,
Some(AttachContainerOptions::<String> {
stdout: Some(true),
stderr: Some(true),
stdin: Some(true),
stream: Some(true),
2022-02-23 21:08:56 +01:00
// setting this to true causes duplicate error output. Why?
logs: Some(false),
2022-01-22 14:32:43 +01:00
..Default::default()
}),
)
.await?;
Ok(ContainerProcess {
docker,
container_id,
2022-01-22 14:32:43 +01:00
stdin: input,
output,
})
}
2022-02-23 21:08:56 +01:00
struct ContainerProcess {
docker: Docker,
container_id: String,
2022-01-22 14:32:43 +01:00
stdin: Pin<Box<dyn AsyncWrite + Send>>,
output: Pin<Box<dyn Stream<Item = Result<LogOutput, bollard::errors::Error>> + Send>>,
}
impl ContainerProcess {
// &mut is required here to make terminate().await Sync
async fn terminate(&mut self) -> Result<(), bollard::errors::Error> {
self.docker
.remove_container(
&self.container_id,
Some(bollard::container::RemoveContainerOptions {
force: true,
..Default::default()
}),
)
.await
}
}
fn run_docker_bot(
2022-02-23 21:08:56 +01:00
process: ContainerProcess,
2022-01-22 14:32:43 +01:00
player_id: u32,
event_bus: Arc<Mutex<EventBus>>,
2022-02-23 21:08:56 +01:00
match_logger: MatchLogger,
) -> DockerBotHandle {
2022-01-22 14:32:43 +01:00
let (tx, rx) = mpsc::unbounded_channel();
let bot_runner = DockerBotRunner {
2022-02-23 21:08:56 +01:00
process,
2022-01-22 14:32:43 +01:00
player_id,
event_bus,
2022-02-23 21:08:56 +01:00
match_logger,
2022-01-22 14:32:43 +01:00
rx,
};
let join_handle = tokio::spawn(bot_runner.run());
DockerBotHandle { tx, join_handle }
2022-01-22 14:32:43 +01:00
}
pub struct DockerBotHandle {
tx: mpsc::UnboundedSender<RequestMessage>,
join_handle: JoinHandle<()>,
2022-01-22 14:32:43 +01:00
}
impl PlayerHandle for DockerBotHandle {
fn send_request(&mut self, r: RequestMessage) {
self.tx
.send(r)
.expect("failed to send message to local bot");
}
fn into_join_handle(self: Box<Self>) -> JoinHandle<()> {
self.join_handle
}
2022-01-22 14:32:43 +01:00
}
pub struct DockerBotRunner {
2022-02-23 21:08:56 +01:00
process: ContainerProcess,
2022-01-22 14:32:43 +01:00
event_bus: Arc<Mutex<EventBus>>,
rx: mpsc::UnboundedReceiver<RequestMessage>,
2022-02-23 21:08:56 +01:00
match_logger: MatchLogger,
2022-01-22 14:32:43 +01:00
player_id: u32,
}
impl DockerBotRunner {
2022-02-23 21:08:56 +01:00
pub async fn run(mut self) {
2022-01-22 14:32:43 +01:00
while let Some(request) = self.rx.recv().await {
2022-02-23 21:08:56 +01:00
let resp_fut = self.communicate(&request.content);
let result = timeout(request.timeout, resp_fut).await;
let request_response = match result {
Ok(Ok(response)) => Ok(response.to_vec()),
2022-09-24 23:37:23 +02:00
// Read failed.
// TODO: better logging for errors
Ok(Err(_read_error)) => Err(RequestError::BotTerminated),
2022-01-22 14:32:43 +01:00
Err(_elapsed) => Err(RequestError::Timeout),
};
let request_id = (self.player_id, request.request_id);
self.event_bus
.lock()
.unwrap()
2022-02-23 21:08:56 +01:00
.resolve_request(request_id, request_response);
2022-01-22 14:32:43 +01:00
}
self.process
.terminate()
.await
.expect("could not terminate process");
2022-01-22 14:32:43 +01:00
}
2022-02-23 21:08:56 +01:00
pub async fn communicate(&mut self, input: &[u8]) -> io::Result<Bytes> {
self.write_line(input).await?;
self.read_line().await
}
async fn write_line(&mut self, bytes: &[u8]) -> io::Result<()> {
self.process.stdin.write_all(bytes).await?;
self.process.stdin.write_u8(b'\n').await?;
self.process.stdin.flush().await?;
Ok(())
}
async fn read_line(&mut self) -> io::Result<Bytes> {
while let Some(item) = self.process.output.next().await {
let log_output = item.expect("failed to get log output");
match log_output {
LogOutput::StdOut { message } => {
// TODO: this is not correct (buffering and such)
return Ok(message);
}
LogOutput::StdErr { mut message } => {
// TODO
if message.ends_with(b"\n") {
message.truncate(message.len() - 1);
}
for line in message.split(|c| *c == b'\n') {
let message = StdErrMessage {
player_id: self.player_id,
message: String::from_utf8_lossy(line).to_string(),
};
self.match_logger
.send(MatchLogMessage::StdErr(message))
.unwrap();
}
}
_ => (),
}
}
// at this point the stream has ended
// does this mean the container has exited?
Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"no response received",
))
}
2022-01-22 14:32:43 +01:00
}