KNN算法[分类算法]

kNN(k-近邻)分类算法的实现

(1) 简介:

(2)算法描述:

(3)

 <?php /* *KNN K-近邻方法(分类算法的实现) */ /* *把.txt中的内容读到数组中保存,$filename:文件名称 */ //-------------------------------------------------------------------- function  getFileContent($filename) {     $array = array(null);     $content = file_get_contents($filename);     $result = explode("\r\n",$content);     //print_r(count($result));     for($j=0;$j<count($result);$j++)     {         //print_r($result[$j]."<br>");         $con = explode(" ",$result[$j]);         array_push($array,$con);     }     array_splice($array,0,1);     return $array; } //-------------------------------------------------------------------- /*  */ //希尔排序算法 //-------------------------------------------------------------------- function shell_sort($array)//降序 {     $dh=(int)(count($array)/2);     while($dh>=1)     {         for($i=$dh;$i<count($array);$i++)         {             $temp=array($array[$i][0],$array[$i][1]);             $j=$i-$dh;             while($j>=0&&($array[$j][1]<$temp[1]))             {                 $array[$j+$dh][1]=$array[$j][1];                 $array[$j+$dh][0]=$array[$j][0];                 $j-=$dh;             }             $array[$j+$dh][1]=$temp[1];             $array[$j+$dh][0]=$temp[0];         }         $dh=(int)($dh/2);     }     return $array; } //------------------------------------------------------------------------- /* *KNN算法 *$test:测试文本;$train:训练文本;$flagsyes:yes;$flagsno:no */ //-------------------------------------------------------------------- function  KNN($test,$train,$flagsyes,$flagsno) {     for($i=1;$i<count($train);$i++)     {         for($j=1;$j<count($test)-1;$j++)         {             if($test[$j]==$train[$i][$j]) $a[$j] = 1;             else $a[$j] = 0;         }         //求两个例子之间的欧氏距离         $sum = 0;         for($j=1;$j<count($test)-1;$j++)         {             $sum += pow(1-$a[$j],2);         }         $distance[$i] = sqrt($sum);     }     $d = array(array(NULL,NULL));     for($i=1;$i<count($train);$i++)     {         $d[$i-1][0] = $train[$i][count($train[$i])-1];         $d[$i-1][1] = 1 /(1+ $distance[$i]) ;     }     $result =shell_sort($d);     $k = (int) sqrt(count($train)-1);//k=sqrt(N);     $count_yes = 0;     for($i=0;$i<$k;$i++)     {         if($result[$i][0]==$flagsyes)$count_yes++;     }     $count_no = $k - $count_yes;     if($count_yes>$count_no)return $flagsyes;     else return $flagsno; } //-------------------------------------------------------------------- $train = getFileContent("train.txt"); $test = getFileContent("test.txt"); for($i=1;$i<count($test);$i++) {     $test[$i][count($test[0])-1] = KNN($test[$i],$train,Y,N); } /* *将数组中的内容读到.txt中 */ //-------------------------------------------------------------------- $fp= fopen('result.txt','wb'); for($i=0;$i<count($test);$i++) {     $temp = NULL;     for($j=0;$j<count($test[$i]);$j++)     {         $temp =  $test[$i][$j]."\t";         fwrite($fp,$temp);     }     fwrite($fp,"\r\n"); } fclose($fp); //-------------------------------------------------------------------- /* *打印输出 */ //-------------------------------------------------------------------- echo "<pre>"; print_r($test); echo "</pre>"; //-------------------------------------------------------------------- ?>