Stocks News
Mahalanobis distance with the matrix library of MQ and Beer. – Statistics – December 18
First of all what is it?
Second some documents
Wikipedia page
We need:
- covariance matrix
- Matrix inversion
- matrix multiplication
- vector average
that much Matrix and vector libraries mql5 actually meets all our requirements.
great. We will build a structure that can be prepared once and reused as long as the sample set does not change.
What do you mean by sample set?
A set of observations with attributes.
To simplify, let’s say we have 10 candles (chart candles) and we have an OHLC on them. So there are 10 samples and 4 attributes or 4 features.
Here is an example usage:
double open(),high(),low(),close(); ArrayResize(open,10,0); ArrayResize(high,10,0); ArrayResize(low,10,0); ArrayResize(close,10,0); for(int i=0;i<10;i++) open(i)=iOpen(_Symbol,_Period,i+1); high(i)=iHigh(_Symbol,_Period,i+1); low(i)=iLow(_Symbol,_Period,i+1); close(i)=iClose(_Symbol,_Period,i+1); mahalanober M; M.setup(4,10); M.fill_feature(0,open); M.fill_feature(1,high); M.fill_feature(2,low); M.fill_feature(3,close); double md=M.distanceOfSampleToDistribution(2); Print("Mahalabonis Distance of bar 2 to the distribution "+DoubleToString(md,4)); md=M.distanceOfSampleToSample(5,0); Print("Mahalabonis Distance of bar(0) to bar(5) in the distribution "+DoubleToString(md,4));
And here’s the structure
struct mahalanober{ private: vector features(); bool filled(); vector feature_means; matrix covariance_matrix_inverse; int total_features,total_samples; public: mahalanober(void)reset(); ~mahalanober(void)reset(); void reset() total_features=0; total_samples=0; ArrayFree(features); ArrayFree(filled); feature_means.Init(0); covariance_matrix_inverse.Init(0,0); void setup(int _total_features, int _total_samples) total_features=_total_features; total_samples=_total_samples; ArrayResize(features,total_features,0); ArrayResize(filled,total_features,0); ArrayFill(filled,0,total_features,false); feature_means.Init(total_features); for(int i=0;i<ArraySize(features);i++) features(i).Init(total_samples); bool fill_feature(int which_feature_ix, double &values_across_samples()) if(which_feature_ix<ArraySize(features)) if(ArraySize(values_across_samples)==total_samples) for(int i=0;i<total_samples;i++) features(which_feature_ix)(i)=values_across_samples(i); feature_means(which_feature_ix)=features(which_feature_ix).Mean(); filled(which_feature_ix)=true; if(all_filled()) calculate_inverse_covariance_matrix(); return(true); else Print("MHLNB::fill_feature::Amount of values does not match total samples"); else Print("MHLNB::fill_feature::Feature("+IntegerToString(which_feature_ix)+") does not exist"); return(false); double distanceOfSampleToDistribution(int which_sample) if(all_filled()) if(which_sample<total_samples) matrix term0; term0.Init(total_features,1); for(int i=0;i<total_features;i++) term0(i)(0)=features(i)(which_sample)-feature_means(i); matrix term3=term0; matrix term1; term1=term0.Transpose(); matrix term2=term1.MatMul(covariance_matrix_inverse); matrix last_term=term2.MatMul(term3); return(MathSqrt(last_term(0)(0))); else Print("MLHNB::distanceOfSampleToDistribution()::Sample ("+IntegerToString(which_sample)+") does not exist returning 0.0"); else list_unfilled("distanceOfSampleToDistribution()"); return(0.0); double distanceOfSampleToSample(int sample_a,int sample_b){ if(all_filled()) if(sample_a<total_samples) if(sample_b<total_samples) matrix term0; term0.Init(total_features,1); for(int i=0;i<total_features;i++) term0(i)(0)=features(i)(sample_a)-features(i)(sample_b); matrix term3=term0; matrix term1; term1=term0.Transpose(); matrix term2=term1.MatMul(covariance_matrix_inverse); matrix last_term=term2.MatMul(term3); return(MathSqrt(last_term(0)(0))); else Print("MLHNB::distanceOfSampleToSample()::Sample ("+IntegerToString(sample_b)+") does not exist returning 0.0"); else Print("MLHNB::distanceOfSampleToSample()::Sample ("+IntegerToString(sample_a)+") does not exist returning 0.0"); else list_unfilled("distanceOfSampleToSample()"); return(0.0); } private: void calculate_inverse_covariance_matrix() matrix samples_by_features; samples_by_features.Init(total_samples,total_features); for(int f=0;f<total_features;f++) for(int s=0;s<total_samples;s++) samples_by_features(s)(f)=features(f)(s); matrix covariance_matrix=samples_by_features.Cov(false); covariance_matrix_inverse=covariance_matrix.Inv(); bool all_filled() if(total_features>0) for(int i=0;i<total_features;i++) if(!filled(i)) return(false); return(true); return(false); void list_unfilled(string fx) for(int i=0;i<total_features;i++) if(!filled(i)) Print("MLHNB::"+fx+"::Feature("+IntegerToString(i)+") is not filled!"); };
Please let me know if you find any mistakes.
Cheers