-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
I see this is quite an old repo now but it helped me get to something that worked where ChatGPT failed 😅 Thanks for posting it
Here's a version in python:
import numpy as np
from PIL import Image
import soundfile as sf
def img2spect(img_data, minfreq, maxfreq, dur, Fs, invert):
"""
Convert an image to audio such that the image will appear on a spectrogram of the audio.
Parameters:
- img_data: Image data as a numpy array
- minfreq: Minimum frequency represented in the spectrogram
- maxfreq: Maximum frequency represented in the spectrogram
- dur: Duration of the output audio in seconds
- Fs: Sample rate of the output audio
- invert: If true, invert the image's colors
Returns:
- audio: A numpy array containing the audio samples encoding the image
"""
if invert:
img_data = 255 - img_data
imd = np.flipud(img_data) / 255.0
audio = np.zeros(int(dur * Fs))
oldlin = np.linspace(0, dur, img_data.shape[1])
newlin = np.linspace(0, dur, int(dur * Fs))
np.random.seed(1) # for reproducibility
for row in range(img_data.shape[0]):
amplitudes = np.interp(newlin, oldlin, imd[row, :], left=0, right=0)
freq = minfreq * (maxfreq / minfreq) ** (row / img_data.shape[0])
phase_offset = np.random.rand()
signal = np.sin(2 * np.pi * freq * (newlin + phase_offset))
audio += amplitudes * signal
audio = audio / np.max(np.abs(audio))
return audioExample usage:
image_path = 'path_to_input_image.png'
output_audio_path = 'path_to_output_file.wav'
min_freq = 500 # in Hz
max_freq = 10000 # in Hz
duration = 5 # in seconds
sample_rate = 44100 # in Hz
invert_colors = False
# Load the image
img = Image.open(image_path).convert('L')
img_data = np.array(img)
# Generate the audio signal
audio_signal = img2spect(img_data, min_freq, max_freq, duration, sample_rate, invert_colors)
# Save the audio signal to a file
sf.write(output_audio_path, audio_signal, sample_rate)scholtes and lewiswatson55
Metadata
Metadata
Assignees
Labels
No labels