refactoring downloading system to accomodate multiple downloader types. Moved http downloads into respective implementation

This commit is contained in:
Stefano Fontana 2024-12-14 16:27:14 +01:00
parent a3ad0a0bba
commit ec3bc7dd7e
8 changed files with 261 additions and 113 deletions

View File

@ -6,7 +6,7 @@ use std::time::{Duration, Instant};
use codespan_reporting::term;
use codespan_reporting::term::termcolor::WriteColor;
use typst::utils::format_duration;
use typst_kit::download::{DownloadState, Downloader, Progress};
use typst_kit::package_downloads::{DownloadState, Downloader, Progress};
use crate::terminal::{self, TermOut};
use crate::ARGS;
@ -43,11 +43,7 @@ impl<T: Display> Progress for PrintDownload<T> {
/// Returns a new downloader.
pub fn downloader() -> Downloader {
let user_agent = concat!("typst/", env!("CARGO_PKG_VERSION"));
match ARGS.cert.clone() {
Some(cert) => Downloader::with_path(user_agent, cert),
None => Downloader::new(user_agent),
}
Downloader::new(ARGS.cert.clone())
}
/// Compile and format several download statistics and make and attempt at

View File

@ -38,7 +38,9 @@ default = ["fonts", "packages"]
fonts = ["dep:fontdb", "fontdb/memmap", "fontdb/fontconfig"]
# Add generic downloading utilities
downloads = ["dep:env_proxy", "dep:native-tls", "dep:ureq", "dep:openssl"]
downloads = ["downloads_http", "downloads_git"]
downloads_http = ["dep:env_proxy", "dep:native-tls", "dep:ureq", "dep:openssl"]
downloads_git = []
# Add package downloading utilities, implies `downloads`
packages = ["downloads", "dep:dirs", "dep:flate2", "dep:tar"]

View File

@ -20,7 +20,7 @@
//! `downloads` feature flag.
#[cfg(feature = "downloads")]
pub mod download;
pub mod package_downloads;
#[cfg(feature = "fonts")]
pub mod fonts;
#[cfg(feature = "packages")]

View File

@ -1,22 +1,13 @@
//! Download and unpack packages and package indices.
use std::fs;
use std::path::{Path, PathBuf};
use ecow::eco_format;
use once_cell::sync::OnceCell;
use typst_library::diag::{bail, PackageError, PackageResult, StrResult};
use typst_library::diag::{PackageError, PackageResult, StrResult};
use typst_syntax::package::{
PackageInfo, PackageSpec, PackageVersion, VersionlessPackageSpec,
};
use crate::download::{Downloader, Progress};
/// The default Typst registry.
pub const DEFAULT_REGISTRY: &str = "https://packages.typst.org";
/// The public namespace in the default Typst registry.
pub const DEFAULT_NAMESPACE: &str = "preview";
use crate::package_downloads::{Downloader, PackageDownloader, Progress};
/// The default packages sub directory within the package and package cache paths.
pub const DEFAULT_PACKAGES_SUBDIR: &str = "typst/packages";
@ -74,25 +65,27 @@ impl PackageStorage {
) -> PackageResult<PathBuf> {
let subdir = format!("{}/{}/{}", spec.namespace, spec.name, spec.version);
// check the package_path for the package directory.
if let Some(packages_dir) = &self.package_path {
let dir = packages_dir.join(&subdir);
if dir.exists() {
// no need to download, already in the path.
return Ok(dir);
}
}
// package was not in the package_path. check if it has been cached
if let Some(cache_dir) = &self.package_cache_path {
let dir = cache_dir.join(&subdir);
if dir.exists() {
//package was cached, so return the cached directory
return Ok(dir);
}
// Download from network if it doesn't exist yet.
if spec.namespace == DEFAULT_NAMESPACE {
self.download_package(spec, &dir, progress)?;
if dir.exists() {
return Ok(dir);
}
self.download_package(spec, &dir, progress)?;
if dir.exists() {
return Ok(dir);
}
}
@ -104,47 +97,36 @@ impl PackageStorage {
&self,
spec: &VersionlessPackageSpec,
) -> StrResult<PackageVersion> {
if spec.namespace == DEFAULT_NAMESPACE {
// For `DEFAULT_NAMESPACE`, download the package index and find the latest
// version.
self.download_index()?
.iter()
.filter(|package| package.name == spec.name)
.map(|package| package.version)
.max()
.ok_or_else(|| eco_format!("failed to find package {spec}"))
} else {
// For other namespaces, search locally. We only search in the data
// directory and not the cache directory, because the latter is not
// intended for storage of local packages.
let subdir = format!("{}/{}", spec.namespace, spec.name);
self.package_path
.iter()
.flat_map(|dir| std::fs::read_dir(dir.join(&subdir)).ok())
.flatten()
.filter_map(|entry| entry.ok())
.map(|entry| entry.path())
.filter_map(|path| path.file_name()?.to_string_lossy().parse().ok())
.max()
.ok_or_else(|| eco_format!("please specify the desired version"))
// Same logical flow as per package download. Check package path, then check online.
// Do not check in the data directory because the latter is not intended for storage
// of local packages.
let subdir = format!("{}/{}", spec.namespace, spec.name);
let res = self.package_path
.iter()
.flat_map(|dir| std::fs::read_dir(dir.join(&subdir)).ok())
.flatten()
.filter_map(|entry| entry.ok())
.map(|entry| entry.path())
.filter_map(|path| path.file_name()?.to_string_lossy().parse().ok())
.max();
if let Some(version) = res {
return Ok(version);
}
self.download_index(spec)?
.iter()
.filter(|package| package.name == spec.name)
.map(|package| package.version)
.max()
.ok_or_else(|| eco_format!("failed to find package {spec}"))
}
/// Download the package index. The result of this is cached for efficiency.
pub fn download_index(&self) -> StrResult<&[PackageInfo]> {
pub fn download_index(&self, spec: &VersionlessPackageSpec) -> StrResult<&[PackageInfo]> {
self.index
.get_or_try_init(|| {
let url = format!("{DEFAULT_REGISTRY}/{DEFAULT_NAMESPACE}/index.json");
match self.downloader.download(&url) {
Ok(response) => response.into_json().map_err(|err| {
eco_format!("failed to parse package index: {err}")
}),
Err(ureq::Error::Status(404, _)) => {
bail!("failed to fetch package index (not found)")
}
Err(err) => bail!("failed to fetch package index ({err})"),
}
})
.get_or_try_init(|| self.downloader.download_index(spec))
.map(AsRef::as_ref)
}
@ -158,31 +140,15 @@ impl PackageStorage {
package_dir: &Path,
progress: &mut dyn Progress,
) -> PackageResult<()> {
assert_eq!(spec.namespace, DEFAULT_NAMESPACE);
let url = format!(
"{DEFAULT_REGISTRY}/{DEFAULT_NAMESPACE}/{}-{}.tar.gz",
spec.name, spec.version
);
let data = match self.downloader.download_with_progress(&url, progress) {
Ok(data) => data,
Err(ureq::Error::Status(404, _)) => {
match self.downloader.download(spec, package_dir, progress) {
Err(PackageError::NotFound(spec)) => {
if let Ok(version) = self.determine_latest_version(&spec.versionless()) {
return Err(PackageError::VersionNotFound(spec.clone(), version));
Err(PackageError::VersionNotFound(spec.clone(), version))
} else {
return Err(PackageError::NotFound(spec.clone()));
Err(PackageError::NotFound(spec.clone()))
}
}
Err(err) => {
return Err(PackageError::NetworkFailed(Some(eco_format!("{err}"))))
}
};
let decompressed = flate2::read::GzDecoder::new(data.as_slice());
tar::Archive::new(decompressed).unpack(package_dir).map_err(|err| {
fs::remove_dir_all(package_dir).ok();
PackageError::MalformedArchive(Some(eco_format!("{err}")))
})
},
val => val
}
}
}

View File

@ -7,27 +7,23 @@
use std::collections::VecDeque;
use std::fmt::Debug;
use std::fs;
use std::io::{self, ErrorKind, Read};
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
use ecow::EcoString;
use ecow::{eco_format, EcoString};
use native_tls::{Certificate, TlsConnector};
use once_cell::sync::OnceCell;
use ureq::Response;
use typst_library::diag::{bail, PackageError, PackageResult};
use typst_syntax::package::{PackageInfo, PackageSpec, VersionlessPackageSpec};
use crate::package_downloads::{DownloadState, PackageDownloader, Progress, DEFAULT_NAMESPACE};
/// Manages progress reporting for downloads.
pub trait Progress {
/// Invoked when a download is started.
fn print_start(&mut self);
/// The default Typst registry.
pub const DEFAULT_REGISTRY: &str = "https://packages.typst.org";
/// Invoked repeatedly while a download is ongoing.
fn print_progress(&mut self, state: &DownloadState);
/// Invoked when a download is finished.
fn print_finish(&mut self, state: &DownloadState);
}
/// An implementation of [`Progress`] with no-op reporting, i.e., reporting
/// events are swallowed.
@ -39,28 +35,14 @@ impl Progress for ProgressSink {
fn print_finish(&mut self, _: &DownloadState) {}
}
/// The current state of an in progress or finished download.
#[derive(Debug)]
pub struct DownloadState {
/// The expected amount of bytes to download, `None` if the response header
/// was not set.
pub content_len: Option<usize>,
/// The total amount of downloaded bytes until now.
pub total_downloaded: usize,
/// A backlog of the amount of downloaded bytes each second.
pub bytes_per_second: VecDeque<usize>,
/// The download starting instant.
pub start_time: Instant,
}
/// A minimal https client for downloading various resources.
pub struct Downloader {
pub struct HttpDownloader {
user_agent: EcoString,
cert_path: Option<PathBuf>,
cert: OnceCell<Certificate>,
}
impl Downloader {
impl HttpDownloader {
/// Crates a new downloader with the given user agent and no certificate.
pub fn new(user_agent: impl Into<EcoString>) -> Self {
Self {
@ -146,9 +128,28 @@ impl Downloader {
let response = self.download(url)?;
Ok(RemoteReader::from_response(response, progress).download()?)
}
fn parse_namespace(ns: &str) -> Result<(String, String), EcoString> {
if ns.eq(DEFAULT_NAMESPACE) {
return Ok((DEFAULT_REGISTRY.to_string(), DEFAULT_NAMESPACE.to_string()))
}
let mut parts = ns.splitn(3, ":");
let schema = parts.next().ok_or_else(|| {
eco_format!("expected schema in {}", ns)
})?;
let registry = parts.next().ok_or_else(|| {
eco_format!("invalid package registry in namespace {}", ns)
})?;
let ns = parts.next().ok_or_else(|| {
eco_format!("invalid package namespace in {}", ns)
})?;
Ok((format!("{}://{}", schema, registry), ns.to_string()))
}
}
impl Debug for Downloader {
impl Debug for HttpDownloader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Downloader")
.field("user_agent", &self.user_agent)
@ -257,3 +258,44 @@ impl<'p> RemoteReader<'p> {
Ok(data)
}
}
impl PackageDownloader for HttpDownloader {
fn download_index(&self, spec: &VersionlessPackageSpec) -> Result<Vec<PackageInfo>, EcoString> {
let (registry, namespace) = Self::parse_namespace(spec.namespace.as_str())?;
let url = format!("{registry}/{namespace}/index.json");
match self.download(&url) {
Ok(response) => response.into_json().map_err(|err| {
eco_format!("failed to parse package index: {err}")
}),
Err(ureq::Error::Status(404, _)) => {
bail!("failed to fetch package index (not found)")
}
Err(err) => bail!("failed to fetch package index ({err})"),
}
}
fn download(&self, spec: &PackageSpec, package_dir: &Path, progress: &mut dyn Progress) -> PackageResult<()> {
let (registry, namespace) = Self::parse_namespace(spec.namespace.as_str()).map_err(|x| PackageError::Other(Some(x)))?;
let url = format!(
"{}/{}/{}-{}.tar.gz",
registry, namespace, spec.name, spec.version
);
let data = match self.download_with_progress(&url, progress) {
Ok(data) => data,
Err(ureq::Error::Status(404, _)) => {
Err(PackageError::NotFound(spec.clone()))?
}
Err(err) => {
Err(PackageError::NetworkFailed(Some(eco_format!("{err}"))))?
}
};
let decompressed = flate2::read::GzDecoder::new(data.as_slice());
tar::Archive::new(decompressed).unpack(package_dir).map_err(|err| {
fs::remove_dir_all(package_dir).ok();
PackageError::MalformedArchive(Some(eco_format!("{err}")))
})
}
}

View File

@ -0,0 +1,121 @@
use std::collections::VecDeque;
use std::fmt::Debug;
use std::path::{Path, PathBuf};
use std::time::Instant;
use ecow::{eco_format, EcoString};
use typst_library::diag::{PackageError, PackageResult};
use typst_syntax::package::{PackageInfo, PackageSpec, VersionlessPackageSpec};
/// The public namespace in the default Typst registry.
pub const DEFAULT_NAMESPACE: &str = "preview";
#[cfg(feature = "downloads_http")]
mod http;
#[cfg(feature = "downloads_git")]
mod git;
pub trait PackageDownloader : Debug + Sync + Send {
fn download_index(&self, spec: &VersionlessPackageSpec) -> Result<Vec<PackageInfo>, EcoString>;
fn download(&self, spec: &PackageSpec, package_dir: &Path, progress: &mut dyn Progress) -> PackageResult<()>;
}
/// The current state of an in progress or finished download.
#[derive(Debug)]
pub struct DownloadState {
/// The expected amount of bytes to download, `None` if the response header
/// was not set.
pub content_len: Option<usize>,
/// The total amount of downloaded bytes until now.
pub total_downloaded: usize,
/// A backlog of the amount of downloaded bytes each second.
pub bytes_per_second: VecDeque<usize>,
/// The download starting instant.
pub start_time: Instant,
}
/// Manages progress reporting for downloads.
pub trait Progress {
/// Invoked when a download is started.
fn print_start(&mut self);
/// Invoked repeatedly while a download is ongoing.
fn print_progress(&mut self, state: &DownloadState);
/// Invoked when a download is finished.
fn print_finish(&mut self, state: &DownloadState);
}
#[derive(Debug)]
pub struct Downloader{
http_downloader: Option<Box<dyn PackageDownloader>>,
git_downloader: Option<Box<dyn PackageDownloader>>,
}
impl Downloader {
pub fn new(cert: Option<PathBuf>) -> Self {
Self {
http_downloader: Self::make_http_downloader(cert.clone()),
git_downloader: Self::make_git_downloader(cert),
}
}
fn make_http_downloader(cert: Option<PathBuf>) -> Option<Box<dyn PackageDownloader>>{
#[cfg(not(feature = "downloads_http"))]
{ None }
#[cfg(feature = "downloads_http")]
{
let user_agent = concat!("typst/", env!("CARGO_PKG_VERSION"));
match cert {
Some(cert_path) => Some(Box::new(http::HttpDownloader::with_path(user_agent, cert_path))),
None => Some(Box::new(http::HttpDownloader::new(user_agent))),
}
}
}
fn make_git_downloader(_cert: Option<PathBuf>) -> Option<Box<dyn PackageDownloader>>{
#[cfg(not(feature = "downloads_http"))]
{ None }
#[cfg(feature = "downloads_http")]
{
None
}
}
fn get_downloader(&self, ns: &str) -> Result<&Box<dyn PackageDownloader>, PackageError> {
let download_type = ns.splitn(2, ":").next();
match download_type {
#[cfg(feature = "downloads_http")]
Some("http") => self.http_downloader.as_ref().ok_or_else(|| PackageError::Other(Some(EcoString::from("Http downloader has not been initialized correctly")))),
#[cfg(feature = "downloads_http")]
Some("https") => self.http_downloader.as_ref().ok_or_else(|| PackageError::Other(Some(EcoString::from("Https downloader has not been initialized correctly")))),
#[cfg(feature = "downloads_http")]
Some("preview") => self.http_downloader.as_ref().ok_or_else(|| PackageError::Other(Some(EcoString::from("Https downloader has not been initialized correctly")))),
#[cfg(feature = "downloads_git")]
Some("git") => self.git_downloader.as_ref().ok_or_else(|| PackageError::Other(Some(EcoString::from("Git downloader has not been initialized correctly")))),
Some(dwld) => Err(PackageError::Other(Some(eco_format!("Unknown downloader type: {}", dwld)))),
None => Err(PackageError::Other(Some(EcoString::from("No downloader type specified")))),
}
}
}
impl PackageDownloader for Downloader {
fn download_index(&self, spec: &VersionlessPackageSpec) -> Result<Vec<PackageInfo>, EcoString> {
let downloader = self.get_downloader(spec.namespace.as_str())?;
downloader.download_index(spec)
}
fn download(&self, spec: &PackageSpec, package_dir: &Path, progress: &mut dyn Progress) -> PackageResult<()> {
let downloader = self.get_downloader(spec.namespace.as_str())?;
downloader.download(spec, package_dir, progress)
}
}

View File

@ -263,15 +263,36 @@ impl Display for VersionlessPackageSpec {
}
}
fn is_namespace_valid(namespace: &str) -> bool {
if is_ident(namespace){
//standard namespace
return true
}
//if not ident, the namespace should be formed as @<package_remote_type>:<package_path>
let mut tokenized = namespace.splitn(2, ":");
//package type
if tokenized.next().is_none_or(|x| !is_ident(x)) {
return false
}
//the package_path parsing is left to the downloader implementation
true
}
fn parse_namespace<'s>(s: &mut Scanner<'s>) -> Result<&'s str, EcoString> {
if !s.eat_if('@') {
Err("package specification must start with '@'")?;
}
//todo: allow for multiple slashes in the by eating until last slash
let namespace = s.eat_until('/');
if namespace.is_empty() {
Err("package specification is missing namespace")?;
} else if !is_ident(namespace) {
}
if !is_namespace_valid(namespace) {
Err(eco_format!("`{namespace}` is not a valid package namespace"))?;
}