AquaDX/tools/data_convert.py
2024-02-07 22:22:58 -05:00

74 lines
2.2 KiB
Python

import argparse
import shutil
from pathlib import Path
import orjson
import xmltodict
from hypy_utils import write
from hypy_utils.tqdm_utils import pmap
def convert_one(file: Path):
# Get path relative to source
rel = file.relative_to(src)
# If path is one-level under StreamingAssets, ignore it (e.g. StreamingAssets/A000/Data.xml)
if len(rel.parts) <= 2:
return
# Read xml
xml = xmltodict.parse(file.read_text())
# There should only be one root element, expand it
assert len(xml) == 1, f'Expected 1 root element, got {len(xml)}'
xml = xml[list(xml.keys())[0]]
# Remove @xmlns:xsi and @xmlns:xsd
if '@xmlns:xsi' in xml:
del xml['@xmlns:xsi']
if '@xmlns:xsd' in xml:
del xml['@xmlns:xsd']
# Generate target file path
# Ignore the first segment of the relative path, and append to the destination
# Also collapse the single-item directory into the filename
# e.g. {src}/A000/music/music000001/Music.xml -> {dst}/music/000001.json
target = dst / '/'.join(rel.parts[1:-2])
file_id = ''.join(filter(str.isdigit, rel.parts[-2]))
target = target / f'{file_id}.json'
# Create directories if they don't exist
target.parent.mkdir(parents=True, exist_ok=True)
# Write json
write(target, orjson.dumps(xml))
if __name__ == '__main__':
agupa = argparse.ArgumentParser()
agupa.add_argument('source', type=str, help='Package/Sinmai_Data/StreamingAssets directory')
agupa.add_argument('destination', type=str, help='Directory to extract to')
args = agupa.parse_args()
src = Path(args.source)
dst = Path(args.destination)
# Assert that A000 exists in the source directory
assert (src / 'A000').exists(), f'{src}/A000 does not exist'
# Assert that target directory does not exist
if dst.exists():
if input(f'{dst} already exists, delete? (y/n): ') == 'y':
print(f'Deleting {dst}')
shutil.rmtree(dst)
# Find all xml files in the source directory
files = list(src.rglob('*.xml'))
print(f'Found {len(files)} xml files')
# Multithreaded map
pmap(convert_one, files, desc='Converting', unit='file', chunksize=50)