/***************************************************************************** Perceptual Evaluation of Speech Quality (PESQ) ITU-T Draft Recommendation P.862. Version 1.1 - 15 November 2000. NOTICE The Perceptual Evaluation of Speech Quality (PESQ) algorithm and the copyright therein is the property of British Telecommunications plc and Royal KPN NV, and is protected by UK, US and other patents. Permission is granted to use PESQ for the purpose of evaluation of ITU-T recommendation P.862. Any other use of this software or the PESQ algorithm requires a license, which may be obtained from: OPTICOM GmbH Michael Keyhl, Am Weichselgarten 7, D- 91058 Erlangen, Germany Phone: +49 9131 691 160 Fax: +49 9131 691 325 E-mail: info@opticom.de PsyTechnics Limited Richard Reynolds, B54 Adastral Park, Ipswich IP5 3RE, UK Phone: +44 1473 644 730 or +44 7730 426 251 Fax: +44 1473 645 663 E-mail: richard.reynolds@psytechnics.com Patent-only licences should be obtained from Opticom. PsyTechnics or Opticom can provide licences, and further information, for other PESQ products. Further information is also available from: www.pesq.org By using this software you acknowledge that PESQ is protected by copyright and by patents and is being made available to you for the purpose of evaluation of ITU-T Recommendation P.862. You must not use PESQ for any other purpose without first obtaining a written license from British Telecommunications plc and Royal KPN NV, from their agents listed above. You must not disclose, reproduce or otherwise release PESQ to any third party without the prior written permission of British Telecommunications plc and Royal KPN NV. Authors: Antony Rix (BT) Mike Hollier (BT) Andries Hekstra (KPN Research) John Beerends (KPN Research) *****************************************************************************/ #include #include #include "pesq.h" #include "dsp.h" #define ITU_RESULTS_FILE "_pesq_itu_results.txt" #define SIMPLE_RESULTS_FILE "_pesq_results.txt" int main (int argc, const char *argv []); void usage (void); void pesq_measure (SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info, ERROR_INFO * err_info, long * Error_Flag, char ** Error_Type); void usage (void) { printf ("Usage:\n"); printf (" PESQ HELP Displays this text\n"); printf (" PESQ [options] ref deg [smos] [cond]\n"); printf (" Run model on reference ref and degraded deg\n"); printf ("\n"); printf ("Options: +8000 +16000 +swap\n"); printf (" Sample rate - No default. Must select either +8000 or +16000.\n"); printf (" Swap byte order - machine native format by default. Select +swap for byteswap.\n"); printf ("\n"); printf (" [smos] is an optional number copied to %s\n", ITU_RESULTS_FILE); printf (" [cond] is an optional condition number copied to %s\n", ITU_RESULTS_FILE); printf (" smos must always precede cond. However, both may be omitted."); printf ("\n"); printf ("File names, smos, cond may not begin with a + character.\n"); printf ("\n"); printf ("Files with names ending .wav or .WAV are assumed to have a 44-byte header, which"); printf (" is automatically skipped. All other file types are assumed to have no header.\n"); } int main (int argc, const char *argv []) { int arg; int names = 0; long sample_rate = -1; SIGNAL_INFO ref_info; SIGNAL_INFO deg_info; ERROR_INFO err_info; long Error_Flag = 0; char * Error_Type = "Unknown error type."; if (Error_Flag == 0) { printf ("PESQ Perceptual Evaluation of Speech Quality. Version 1.1 - 15 November 2000.\n\n"); printf ("Copyright 2000 British Telecommunications plc. All rights reserved.\n"); printf ("Copyright 2000 Royal KPN NV. All rights reserved.\n\n"); printf ("The Perceptual Evaluation of Speech Quality (PESQ) algorithm and the copyright\n"); printf ("therein is the property of British Telecommunications plc and Royal KPN NV,\n"); printf ("and is protected by UK, US and other patents. Permission is granted to use\n"); printf ("PESQ for the purpose of evaluation in connection with the establishment of\n"); printf ("ITU-T recommendation P.862. Any other use of this software or the PESQ\n"); printf ("algorithm requires a license, which may be obtained from:\n"); printf ("\n"); printf ("OPTICOM GmbH\n"); printf ("Michael Keyhl, Am Weichselgarten 7, D- 91058 Erlangen, Germany\n"); printf ("Phone: +49 9131 691 160 Fax: +49 9131 691 325 E-mail: info@opticom.de\n"); printf ("\n"); printf ("British Telecommunications plc\n"); printf ("Richard Reynolds, PsyTechnics, B54 Adastral Park, Ipswich IP5 3RE, UK\n"); printf ("Phone: +44 1473 644 339 Fax: +44 1473 645 663\n"); printf ("E-mail: richard.reynolds@psytechnics.com\n\n"); if (argc < 3){ usage (); return 0; } else { strcpy (ref_info.path_name, ""); ref_info.apply_swap = 0; strcpy (deg_info.path_name, ""); deg_info.apply_swap = 0; err_info. subj_mos = 0; err_info. cond_nr = 0; for (arg = 1; arg < argc; arg++) { if (argv [arg] [0] == '+') { if (strcmp (argv [arg], "+swap") == 0) { ref_info.apply_swap = 1; deg_info.apply_swap = 1; } else { if (strcmp (argv [arg], "+16000") == 0) { sample_rate = 16000L; } else { if (strcmp (argv [arg], "+8000") == 0) { sample_rate = 8000L; } else { usage (); fprintf (stderr, "Invalid parameter '%s'.\n", argv [arg]); return 1; } } } } else { switch (names) { case 0: strcpy (ref_info.path_name, argv [arg]); break; case 1: strcpy (deg_info.path_name, argv [arg]); break; case 2: sscanf (argv [arg], "%f", &(err_info. subj_mos)); break; case 3: sscanf (argv [arg], "%d", &(err_info. cond_nr)); break; default: usage (); fprintf (stderr, "Invalid parameter '%s'.\n", argv [arg]); return 1; } names++; } } if (sample_rate == -1) { printf ("PESQ Error. Must specify either +8000 or +16000 sample frequency option!\n"); exit (1); } strcpy (ref_info. file_name, ref_info. path_name); if (strrchr (ref_info. file_name, '\\') != NULL) { strcpy (ref_info. file_name, 1 + strrchr (ref_info. file_name, '\\')); } if (strrchr (ref_info. file_name, '/') != NULL) { strcpy (ref_info. file_name, 1 + strrchr (ref_info. file_name, '/')); } strcpy (deg_info. file_name, deg_info. path_name); if (strrchr (deg_info. file_name, '\\') != NULL) { strcpy (deg_info. file_name, 1 + strrchr (deg_info. file_name, '\\')); } if (strrchr (deg_info. file_name, '/') != NULL) { strcpy (deg_info. file_name, 1 + strrchr (deg_info. file_name, '/')); } select_rate (sample_rate, &Error_Flag, &Error_Type); pesq_measure (&ref_info, °_info, &err_info, &Error_Flag, &Error_Type); } } if (Error_Flag == 0) { printf ("\nPrediction : PESQ_MOS = %.3f\n", (double) err_info.pesq_mos); return 0; } else { printf ("An error of type %d ", Error_Flag); if (Error_Type != NULL) { printf (" (%s) occurred during processing.\n", Error_Type); } else { printf ("occurred during processing.\n"); } return 0; } } double align_filter_dB [26] [2] = {{0.,-500}, {50., -500}, {100., -500}, {125., -500}, {160., -500}, {200., -500}, {250., -500}, {300., -500}, {350., 0}, {400., 0}, {500., 0}, {600., 0}, {630., 0}, {800., 0}, {1000., 0}, {1250., 0}, {1600., 0}, {2000., 0}, {2500., 0}, {3000., 0}, {3250., 0}, {3500., -500}, {4000., -500}, {5000., -500}, {6300., -500}, {8000., -500}}; double standard_IRS_filter_dB [26] [2] = {{ 0., -200}, { 50., -40}, {100., -20}, {125., -12}, {160., -6}, {200., 0}, {250., 4}, {300., 6}, {350., 8}, {400., 10}, {500., 11}, {600., 12}, {700., 12}, {800., 12}, {1000., 12}, {1300., 12}, {1600., 12}, {2000., 12}, {2500., 12}, {3000., 12}, {3250., 12}, {3500., 4}, {4000., -200}, {5000., -200}, {6300., -200}, {8000., -200}}; #define TARGET_AVG_POWER 1E7 void fix_power_level (SIGNAL_INFO *info, char *name, long maxNsamples) { long n = info-> Nsamples; long i; float *align_filtered = (float *) safe_malloc ((n + DATAPADDING_MSECS * (Fs / 1000)) * sizeof (float)); float global_scale; float power_above_300Hz; for (i = 0; i < n + DATAPADDING_MSECS * (Fs / 1000); i++) { align_filtered [i] = info-> data [i]; } apply_filter (align_filtered, info-> Nsamples, 26, align_filter_dB); power_above_300Hz = (float) pow_of (align_filtered, SEARCHBUFFER * Downsample, n - SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000), maxNsamples - 2 * SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000)); global_scale = (float) sqrt (TARGET_AVG_POWER / power_above_300Hz); for (i = 0; i < n; i++) { info-> data [i] *= global_scale; } safe_free (align_filtered); } void pesq_measure (SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info, ERROR_INFO * err_info, long * Error_Flag, char ** Error_Type) { float * ftmp = NULL; ref_info-> data = NULL; ref_info-> VAD = NULL; ref_info-> logVAD = NULL; deg_info-> data = NULL; deg_info-> VAD = NULL; deg_info-> logVAD = NULL; if ((*Error_Flag) == 0) { printf ("Reading reference file %s...", ref_info-> path_name); load_src (Error_Flag, Error_Type, ref_info); if ((*Error_Flag) == 0) printf ("done.\n"); } if ((*Error_Flag) == 0) { printf ("Reading degraded file %s...", deg_info-> path_name); load_src (Error_Flag, Error_Type, deg_info); if ((*Error_Flag) == 0) printf ("done.\n"); } if (((ref_info-> Nsamples - 2 * SEARCHBUFFER * Downsample < Fs / 4) || (deg_info-> Nsamples - 2 * SEARCHBUFFER * Downsample < Fs / 4)) && ((*Error_Flag) == 0)) { (*Error_Flag) = 2; (*Error_Type) = "Reference or Degraded below 1/4 second - processing stopped "; } if ((*Error_Flag) == 0) { alloc_other (ref_info, deg_info, Error_Flag, Error_Type, &ftmp); } if ((*Error_Flag) == 0) { int maxNsamples = max (ref_info-> Nsamples, deg_info-> Nsamples); float * model_ref; float * model_deg; long i; FILE *resultsFile; printf (" Level normalization...\n"); fix_power_level (ref_info, "reference", maxNsamples); fix_power_level (deg_info, "degraded", maxNsamples); printf (" IRS filtering...\n"); apply_filter (ref_info-> data, ref_info-> Nsamples, 26, standard_IRS_filter_dB); apply_filter (deg_info-> data, deg_info-> Nsamples, 26, standard_IRS_filter_dB); model_ref = (float *) safe_malloc ((ref_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000)) * sizeof (float)); model_deg = (float *) safe_malloc ((deg_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000)) * sizeof (float)); for (i = 0; i < ref_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) { model_ref [i] = ref_info-> data [i]; } for (i = 0; i < deg_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) { model_deg [i] = deg_info-> data [i]; } input_filter( ref_info, deg_info, ftmp ); printf (" Variable delay compensation...\n"); calc_VAD (ref_info); calc_VAD (deg_info); crude_align (ref_info, deg_info, err_info, WHOLE_SIGNAL, ftmp); utterance_locate (ref_info, deg_info, err_info, ftmp); for (i = 0; i < ref_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) { ref_info-> data [i] = model_ref [i]; } for (i = 0; i < deg_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) { deg_info-> data [i] = model_deg [i]; } safe_free (model_ref); safe_free (model_deg); if ((*Error_Flag) == 0) { if (ref_info-> Nsamples < deg_info-> Nsamples) { float *new_ref = (float *) safe_malloc((deg_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000)) * sizeof(float)); long i; for (i = 0; i < ref_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) { new_ref [i] = ref_info-> data [i]; } for (i = ref_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i < deg_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) { new_ref [i] = 0.0f; } safe_free (ref_info-> data); ref_info-> data = new_ref; new_ref = NULL; } else { if (ref_info-> Nsamples > deg_info-> Nsamples) { float *new_deg = (float *) safe_malloc((ref_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000)) * sizeof(float)); long i; for (i = 0; i < deg_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) { new_deg [i] = deg_info-> data [i]; } for (i = deg_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i < ref_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) { new_deg [i] = 0.0f; } safe_free (deg_info-> data); deg_info-> data = new_deg; new_deg = NULL; } } } printf (" Acoustic model processing...\n"); pesq_psychoacoustic_model (ref_info, deg_info, err_info, ftmp); safe_free (ref_info-> data); safe_free (ref_info-> VAD); safe_free (ref_info-> logVAD); safe_free (deg_info-> data); safe_free (deg_info-> VAD); safe_free (deg_info-> logVAD); safe_free (ftmp); resultsFile = fopen (ITU_RESULTS_FILE, "at"); if (resultsFile != NULL) { long start, end; if (0 != fseek (resultsFile, 0, SEEK_SET)) { printf ("Could not move to start of results file %s!\n", ITU_RESULTS_FILE); exit (1); } start = ftell (resultsFile); if (0 != fseek (resultsFile, 0, SEEK_END)) { printf ("Could not move to end of results file %s!\n", ITU_RESULTS_FILE); exit (1); } end = ftell (resultsFile); if (start == end) { fprintf (resultsFile, "REFERENCE\t DEGRADED\t PESQMOS\t PESQMOS\t SUBJMOS\t COND\t SAMPLE_FREQ\t CRUDE_DELAY\n"); fflush (resultsFile); } fprintf (resultsFile, "%s\t ", ref_info-> path_name); fprintf (resultsFile, "%s\t ", deg_info-> path_name); fprintf (resultsFile, "SQValue=%.3f\t ", err_info->pesq_mos); fprintf (resultsFile, "%.3f\t ", err_info->pesq_mos); fprintf (resultsFile, "%.3f\t ", err_info->subj_mos); fprintf (resultsFile, "%d\t ", err_info->cond_nr); fprintf (resultsFile, "%d\t", Fs); fprintf (resultsFile, "%.4f\n ", (float) err_info-> Crude_DelayEst / (float) Fs); fclose (resultsFile); } resultsFile = fopen (SIMPLE_RESULTS_FILE, "at"); if (resultsFile != NULL) { long start, end; if (0 != fseek (resultsFile, 0, SEEK_SET)) { printf ("Could not move to start of results file %s!\n", SIMPLE_RESULTS_FILE); exit (1); } start = ftell (resultsFile); if (0 != fseek (resultsFile, 0, SEEK_END)) { printf ("Could not move to end of results file %s!\n", SIMPLE_RESULTS_FILE); exit (1); } end = ftell (resultsFile); if (start == end) { fprintf (resultsFile, "DEGRADED\t PESQMOS\t SUBJMOS\t COND\t SAMPLE_FREQ\t CRUDE_DELAY\n"); fflush (resultsFile); } fprintf (resultsFile, "%s\t ", deg_info-> file_name); fprintf (resultsFile, "%.3f\t ", err_info->pesq_mos); fprintf (resultsFile, "%.3f\t ", err_info->subj_mos); fprintf (resultsFile, "%d\t ", err_info->cond_nr); fprintf (resultsFile, "%d\t", Fs); fprintf (resultsFile, "%.4f\n ", (float) err_info-> Crude_DelayEst / (float) Fs); fclose (resultsFile); } } return; } /* END OF FILE */