-
Notifications
You must be signed in to change notification settings - Fork 7
/
setup.py
executable file
·107 lines (90 loc) · 2.69 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import subprocess
import sys
from pathlib import Path
from datetime import datetime
from setuptools import setup, find_packages
def shell(*args):
out = subprocess.check_output(args)
return out.decode("ascii").strip()
def write_version(version_core, pre_release=True):
if pre_release:
time = shell("git", "log", "-1", "--format=%cd", "--date=iso")
time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S %z")
time = time.strftime("%Y%m%d%H%M%S")
version = f"{version_core}-dev{time}"
else:
version = version_core
with open(Path("vall_e", "version.py"), "w") as f:
f.write('__version__ = "{}"\n'.format(version))
return version
with open("README.md", "r") as f:
long_description = f.read()
platform_dependencies = []
if sys.platform.startswith("win"):
platform_dependencies += ["psutil"]
else:
platform_dependencies += ["deepspeed>=0.7.7"]
setup(
name="vall-e",
python_requires=">=3.10.0",
version=write_version("0.0.1"),
description="An unofficial implementation of the audio LM VALL-E",
author="ecker",
author_email="[email protected]",
long_description=long_description,
long_description_content_type="text/markdown",
packages=find_packages(),
install_requires=
platform_dependencies + [
# logging niceties
"coloredlogs>=15.0.1", # barely required
"humanize>=4.4.0", # not really required
"matplotlib>=3.6.0", # only required for plotting
"pandas>=1.5.0", # not really required
# boiler plate niceties
#"diskcache>=5.4.0",
"einops>=0.6.0", # could be replaced
"tqdm",
# HF bloat
"tokenizers",
"transformers",
"safetensors",
# training bloat
"auraloss[all]", # [all] is needed for MelSTFTLoss
"h5py",
"prodigyopt @ git+https://github.com/konstmish/prodigy",
# practically the reason to use python
"numpy",
"torch>=1.13.0",
"torchaudio>=0.13.0",
"torchmetrics",
# core foundations
"phonemizer>=2.1.0",
"encodec>=0.1.1",
"vocos",
# for the web UI
"gradio",
"nltk", # for parsing text inputs down to pieces
"langdetect", # for detecting the language of a text
],
extras_require = {
"all": [
# retnet backend (even though two internal copies exist)
"torchscale @ git+https://git.ecker.tech/mrq/torchscale",
# bitnet
"bitnet",
# mamba
"causal-conv1d",
"mamba-ssm",
#
"torcheval",
# attention helpers
"xformers",
"sageattention==1.0.6",
# "flash-attn" --no-build-isolation # commented out right now because I want to query this for Volta freaks like me who can't use it
# other audio backend that doesn't prove fruitful
"descript-audio-codec",
]
},
url="https://git.ecker.tech/mrq/vall-e",
)