@Article{Kumar_Cuccuru_Gruning-acces_infra_for-2022,
  author =	 {Kumar, Anup and Cuccuru, Gianmauro and Gruning, Bjorn and 
                  Backofen, Rolf},
  title =	 {An accessible infrastructure for artificial intelligence 
                  using a {Docker}-based {JupyterLab} in {Galaxy}},
  journal =	 {Gigascience},
  year =	 {2022},
  volume =	 {12},
  number =	 {},
  pages =	 {},
  user =	 {backofen},
  pmid =	 {37099385},
  doi = 	 {10.1093/gigascience/giad028},
  issn = 	 {2047-217X},
  abstract =	 {BACKGROUND: Artificial intelligence (AI) programs that 
                  train on large datasets require powerful compute 
                  infrastructure consisting of several CPU cores and GPUs. 
                  JupyterLab provides an excellent framework for developing AI 
                  programs, but it needs to be hosted on such an 
                  infrastructure to enable faster training of AI programs 
                  using parallel computing. FINDINGS: An open-source, 
                  docker-based, and GPU-enabled JupyterLab infrastructure is 
                  developed that runs on the public compute infrastructure of 
                  Galaxy Europe consisting of thousands of CPU cores, many 
                  GPUs, and several petabytes of storage to rapidly prototype 
                  and develop end-to-end AI projects. Using a JupyterLab 
                  notebook, long-running AI model training programs can also 
                  be executed remotely to create trained models, represented 
                  in open neural network exchange (ONNX) format, and other 
                  output datasets in Galaxy. Other features include Git 
                  integration for version control, the option of creating and 
                  executing pipelines of notebooks, and multiple dashboards 
                  and packages for monitoring compute resources and 
                  visualization, respectively. CONCLUSIONS: These features 
                  make JupyterLab in Galaxy Europe highly suitable for 
                  creating and managing AI projects. A recent scientific 
                  publication that predicts infected regions in COVID-19 
                  computed tomography scan images is reproduced using various 
                  features of JupyterLab on Galaxy Europe. In addition, 
                  ColabFold, a faster implementation of AlphaFold2, is 
                  accessed in JupyterLab to predict the 3-dimensional 
                  structure of protein sequences. JupyterLab is accessible in 
                  2 ways-one as an interactive Galaxy tool and the other by 
                  running the underlying Docker container. In both ways, 
                  long-running training can be executed on Galaxy's compute 
                  infrastructure. Scripts to create the Docker container are 
                  available under MIT license at 
                  https://github.com/usegalaxy-eu/gpu-jupyterlab-docker.}
}