`
sstrive
  • 浏览: 12567 次
  • 性别: Icon_minigender_1
  • 来自: 武汉
最近访客 更多访客>>
社区版块
存档分类

求串s到串t编辑距离并输出一种编辑方法

阅读更多
///要求:求串s到串t编辑距离并输出一种编辑方法。
///编辑距离就是用来计算从原串(s)转换到目标串(t)所需要的最少的插入,删除和替换的数目,在NLP中应用比较广泛。
#include <iostream>
#include <string.h>
using namespace std;
//存储操作信息;
struct OperInfo  
{
	int oper; //操作类型,0:无操作、1:delete,2:insert,3:replace;
	int i;    //当前操作处于源串第i位;
	int j;    //当前操作处于目的串第j位;
	OperInfo *next;//下一个操作;
};

//第五位表示操作0:无操作、1:delete,2:insert,3:replace;
int doInfo[1000][5]={{0,0,0,0,0}};
int cnt=0;
int count=0;
//求三个数中的最小数
int Minimum(int a, int b, int c) 
{       
	int mi;		        
	mi = a;    
	if (b < mi) 
	{    	
		mi = b;     
	}
	if (c < mi)
	{           
		mi = c;      
	}     
	return mi;   
}
		
//计算两个字符串间的编辑距离
//原理请参见:http://www.gdcp.cn/jpkc/sjjg/app/jm/edit_distance/problem.htm
 int getEditDistance(char *s, char *t) {
	    int **d; // matrix
        int n; // length of s
        int m; // length of t
        int i; // iterates through s
        int j; // iterates through t
        char s_i; // ith character of s
        char t_j; // jth character of t
        int cost; // cost
		
        // Step 1
		
        n = strlen(s);
        m = strlen(t);
		d=new int*[n+1];
		for(i=0;i<=n;i++)
		{
			d[i]=new int[m+1];
		}
		
        if (n == 0) {
            return m;
        }
        if (m == 0) {
            return n;
        }
       
		
        // Step 2
		
        for (i = 0; i <= n; i++) {
            d[i][0] = i;
        }
		
        for (j = 0; j <= m; j++) {
            d[0][j] = j;
        }
		
        // Step 3
		
        for (i = 1; i <= n; i++) {
            s_i = s[i-1];
            // Step 4
            for (j = 1; j <= m; j++) {
                t_j = t[j - 1];
                // Step 5
                if (s_i == t_j) {
                    cost = 0;
                } else {
                    cost = 1;
                }
                // Step 6
				//若最后一步为delete操作:则d[i][j]=d[i-1][j]+1;
				//若最后一步为insert操作:则d[i][j]=d[i][j-1]+1;
				//若cost=0,则不操作。
				//若cost=1,则作替换。
				d[i][j] = Minimum(d[i - 1][j] + 1, d[i][j - 1] + 1,
					d[i - 1][j - 1] + cost);
				if (d[i][j]==(d[i - 1][j] + 1))
				{
					doInfo[cnt][0]=i;
					doInfo[cnt][1]=j;
					doInfo[cnt][2]=i-1;
					doInfo[cnt][3]=j;
					doInfo[cnt][4]=1;//delete
					cnt++;
				}
				if (d[i][j]==(d[i][j-1] + 1))
				{
					doInfo[cnt][0]=i;
					doInfo[cnt][1]=j;
					doInfo[cnt][2]=i;
					doInfo[cnt][3]=j-1;
					doInfo[cnt][4]=2;//insert
					cnt++;
				}
				if (d[i][j]==(d[i - 1][j-1] +cost)&&cost==1)
				{
					doInfo[cnt][0]=i;
					doInfo[cnt][1]=j;
					doInfo[cnt][2]=i-1;
					doInfo[cnt][3]=j-1;
					doInfo[cnt][4]=3;//replace
					cnt++;
				}
				if (d[i][j]==(d[i - 1][j-1] +cost)&&cost==0)
				{
					doInfo[cnt][0]=i;
					doInfo[cnt][1]=j;
					doInfo[cnt][2]=i-1;
					doInfo[cnt][3]=j-1;
					doInfo[cnt][4]=0;//no
					cnt++;
				}             
            }
        }
        // Step 7
	   int	result=d[n][m];
	   //释放二维数组占用的空间
	   for (i=0;i<=n;i++)
	   {
		   delete []d[i];
	   }
	   delete []d;
       return result;
		
    }
	//将操作步聚存储到链表中;
   void getOperater(int n,int m,OperInfo *head)
   {
	   int i;
	   for (i=0;i<100;i++)
	   {	
		   
		  if(doInfo[i][0]==0&&doInfo[i][1]==0&&doInfo[i][2]==0&&doInfo[i][3]==0&&doInfo[i][4]==0)
			    break;
		  if (doInfo[i][0]==n&&doInfo[i][1]==m)
		  {
             OperInfo *node=new OperInfo;
			 node->oper=doInfo[i][4];
			 node->i=doInfo[i][0];
			 node->j=doInfo[i][1];
			 node->next=head->next;
			 head->next=node;
			 if (m==0&&n==0)
				 break;
			 getOperater(doInfo[i][2],doInfo[i][3],head);
			 break;//只取一种实现方式。
		  }	
	   }
   }
   void displayDoInfo(OperInfo *head,char *a,char *b) 
   {
	  cout<<"其中的一种操作步骤如下:"<<endl;
      OperInfo *node=head;
	  node=node->next;
	  while(node!=NULL) 
	  {
		  if (node->oper==0)
		  {
			  cout<<"没有操作,直接看下一位"<<endl;
		  }
		  if (node->oper==1)
		  {
			  count++;
			  cout<<"第"<<count<<"步--"<<"delete:"<<a[node->i-1]<<endl;
		  }
		  if (node->oper==2)
		  {
			  count++;
			  cout<<"第"<<count<<"步--"<<"insert:"<<b[node->j-1]<<endl;
		  }
		  if (node->oper==3)
		  {
			  count++;
			  cout<<"第"<<count<<"步--"<<"replace:"<<a[node->i-1]<<"->"<<b[node->j-1]<<endl;
		  }
		  node=node->next;
	  }
	  /*
	  int i,j;
	  	  for (i=0;i<100;i++)
	  	  {
	  		  for (j=0;j<5;j++)
	  		  {
	  			  cout<<doInfo[i][j]<<"  ";
	  		  }
	  		  cout<<endl;
	  	  }  */	  	  
   }

void main()
{
   char *a="aded";
   char *b="acfbe";
   int n=strlen(a);
   int m=strlen(b);
   OperInfo *head=new OperInfo;
   head->next=NULL;
   cout<<a<<"-->"<<b<<"的编辑距离为:"<<getEditDistance(a,b)<<endl;
   getOperater(n,m,head);
   displayDoInfo(head,a,b);
}

 

0
0
分享到:
评论
1 楼 gk23 2009-12-07  
不错

相关推荐

Global site tag (gtag.js) - Google Analytics