2 ** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
4 ** This program is free software; you can redistribute it and/or modify
5 ** it under the terms of the GNU Lesser General Public License as published by
6 ** the Free Software Foundation; either version 2.1 of the License, or
7 ** (at your option) any later version.
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ** GNU Lesser General Public License for more details.
14 ** You should have received a copy of the GNU Lesser General Public License
15 ** along with this program; if not, write to the Free Software
16 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
30 /*------------------------------------------------------------------------------
31 ** Macros to handle big/little endian issues.
34 #define SFE_HTK_BAD_FILE_LEN 1666
35 #define SFE_HTK_NOT_WAVEFORM 1667
37 /*------------------------------------------------------------------------------
38 ** Private static functions.
41 static int htk_close (SF_PRIVATE
*psf
) ;
43 static int htk_write_header (SF_PRIVATE
*psf
, int calc_length
) ;
44 static int htk_read_header (SF_PRIVATE
*psf
) ;
46 /*------------------------------------------------------------------------------
51 htk_open (SF_PRIVATE
*psf
)
56 return SFE_HTK_NO_PIPE
;
58 if (psf
->file
.mode
== SFM_READ
|| (psf
->file
.mode
== SFM_RDWR
&& psf
->filelength
> 0))
59 { if ((error
= htk_read_header (psf
)))
63 subformat
= SF_CODEC (psf
->sf
.format
) ;
65 if (psf
->file
.mode
== SFM_WRITE
|| psf
->file
.mode
== SFM_RDWR
)
66 { if ((SF_CONTAINER (psf
->sf
.format
)) != SF_FORMAT_HTK
)
67 return SFE_BAD_OPEN_FORMAT
;
69 psf
->endian
= SF_ENDIAN_BIG
;
71 if (htk_write_header (psf
, SF_FALSE
))
74 psf
->write_header
= htk_write_header
;
77 psf
->container_close
= htk_close
;
79 psf
->blockwidth
= psf
->bytewidth
* psf
->sf
.channels
;
82 { case SF_FORMAT_PCM_16
: /* 16-bit linear PCM. */
83 error
= pcm_init (psf
) ;
92 /*------------------------------------------------------------------------------
96 htk_close (SF_PRIVATE
*psf
)
98 if (psf
->file
.mode
== SFM_WRITE
|| psf
->file
.mode
== SFM_RDWR
)
99 htk_write_header (psf
, SF_TRUE
) ;
105 htk_write_header (SF_PRIVATE
*psf
, int calc_length
)
106 { sf_count_t current
;
107 int sample_count
, sample_period
;
109 current
= psf_ftell (psf
) ;
112 psf
->filelength
= psf_get_filelen (psf
) ;
114 /* Reset the current header length to zero. */
115 psf
->header
[0] = 0 ;
117 psf_fseek (psf
, 0, SEEK_SET
) ;
119 if (psf
->filelength
> 12)
120 sample_count
= (psf
->filelength
- 12) / 2 ;
124 sample_period
= 10000000 / psf
->sf
.samplerate
;
126 psf_binheader_writef (psf
, "E444", sample_count
, sample_period
, 0x20000) ;
128 /* Header construction complete so write it out. */
129 psf_fwrite (psf
->header
, psf
->headindex
, 1, psf
) ;
134 psf
->dataoffset
= psf
->headindex
;
137 psf_fseek (psf
, current
, SEEK_SET
) ;
140 } /* htk_write_header */
143 ** Found the following info in a comment block within Bill Schottstaedt's
146 ** HTK format files consist of a contiguous sequence of samples preceded by a
147 ** header. Each sample is a vector of either 2-byte integers or 4-byte floats.
148 ** 2-byte integers are used for compressed forms as described below and for
149 ** vector quantised data as described later in section 5.11. HTK format data
150 ** files can also be used to store speech waveforms as described in section 5.8.
152 ** The HTK file format header is 12 bytes long and contains the following data
153 ** nSamples -- number of samples in file (4-byte integer)
154 ** sampPeriod -- sample period in 100ns units (4-byte integer)
155 ** sampSize -- number of bytes per sample (2-byte integer)
156 ** parmKind -- a code indicating the sample kind (2-byte integer)
158 ** The parameter kind consists of a 6 bit code representing the basic
159 ** parameter kind plus additional bits for each of the possible qualifiers.
160 ** The basic parameter kind codes are
162 ** 0 WAVEFORM sampled waveform
163 ** 1 LPC linear prediction filter coefficients
164 ** 2 LPREFC linear prediction reflection coefficients
165 ** 3 LPCEPSTRA LPC cepstral coefficients
166 ** 4 LPDELCEP LPC cepstra plus delta coefficients
167 ** 5 IREFC LPC reflection coef in 16 bit integer format
168 ** 6 MFCC mel-frequency cepstral coefficients
169 ** 7 FBANK log mel-filter bank channel outputs
170 ** 8 MELSPEC linear mel-filter bank channel outputs
171 ** 9 USER user defined sample kind
172 ** 10 DISCRETE vector quantised data
174 ** and the bit-encoding for the qualifiers (in octal) is
175 ** _E 000100 has energy
176 ** _N 000200 absolute energy suppressed
177 ** _D 000400 has delta coefficients
178 ** _A 001000 has acceleration coefficients
179 ** _C 002000 is compressed
180 ** _Z 004000 has zero mean static coef.
181 ** _K 010000 has CRC checksum
182 ** _O 020000 has 0'th cepstral coef.
186 htk_read_header (SF_PRIVATE
*psf
)
187 { int sample_count
, sample_period
, marker
;
189 psf_binheader_readf (psf
, "pE444", 0, &sample_count
, &sample_period
, &marker
) ;
191 if (2 * sample_count
+ 12 != psf
->filelength
)
192 return SFE_HTK_BAD_FILE_LEN
;
194 if (marker
!= 0x20000)
195 return SFE_HTK_NOT_WAVEFORM
;
197 psf
->sf
.channels
= 1 ;
199 if (sample_period
> 0)
200 { psf
->sf
.samplerate
= 10000000 / sample_period
;
201 psf_log_printf (psf
, "HTK Waveform file\n Sample Count : %d\n Sample Period : %d => %d Hz\n",
202 sample_count
, sample_period
, psf
->sf
.samplerate
) ;
205 { psf
->sf
.samplerate
= 16000 ;
206 psf_log_printf (psf
, "HTK Waveform file\n Sample Count : %d\n Sample Period : %d (should be > 0) => Guessed sample rate %d Hz\n",
207 sample_count
, sample_period
, psf
->sf
.samplerate
) ;
210 psf
->sf
.format
= SF_FORMAT_HTK
| SF_FORMAT_PCM_16
;
213 /* HTK always has a 12 byte header. */
214 psf
->dataoffset
= 12 ;
215 psf
->endian
= SF_ENDIAN_BIG
;
217 psf
->datalength
= psf
->filelength
- psf
->dataoffset
;
219 psf
->blockwidth
= psf
->sf
.channels
* psf
->bytewidth
;
221 if (! psf
->sf
.frames
&& psf
->blockwidth
)
222 psf
->sf
.frames
= (psf
->filelength
- psf
->dataoffset
) / psf
->blockwidth
;
225 } /* htk_read_header */