“智能”汽车在相同输入下表现不同

问题描述

所以我制作了一个汽车控制器,它可以使用 Input.GetAxis() 从键盘获取输入,并且可以接收模拟 Input.GetAxis() 的数组。我的赛道被分成几个部分,在每个部分,汽车会读取速度的替代输入或转弯的输入。

在开始训练时,起始位置被保存,如果汽车撞墙或只是坐了 10 秒,然后会在起始位置用一组新的输入重新启动。

这是移动汽车的脚本

   public class Movment : MonoBehavIoUr
   {
       [Serializefield]
       private new Rigidbody rigidbody;
   
       [Serializefield]
       private float
           speed,top_speed,turn_speed,gravity_multiplayer;
   
       private bool isTurning = false;
   
       private float turn_dir = 0,drive_dir = 0;
   
       public Individual indv;
       public int current_index;
   
       // Start is called before the first frame update
       void Start()
       {
   
       }
   
       // Update is called once per frame
       void FixedUpdate()
       {
   
           Drive(drive_dir);
           Turn(turn_dir);
           ExtraGravity();
   
   
           //Debug.Log("Speed" + drive_dir.ToString());
           //Debug.Log("Turn" + turn_dir.ToString());
       }
       private void Drive()
       {
           float drive_dir = Input.GetAxisRaw("Vertical");
           rigidbody.AddRelativeForce(new Vector3(Vector3.forward.x,Vector3.forward.z) * speed * drive_dir);
   
           Vector3 localVeLocity = transform.InverseTransformDirection(rigidbody.veLocity);
           localVeLocity.x = 0;
           rigidbody.veLocity = transform.TransformDirection(localVeLocity);
       }
   
       void Turn()
       {
           Vector3 update_rotation = Vector3.zero;
           float turn_dir = Input.GetAxisRaw("Horizontal");
   
           update_rotation = Vector3.up * turn_speed * turn_dir;
           rigidbody.AddTorque(update_rotation);
       }
   
       private void Drive(float network_drive_dir)
       {
           rigidbody.AddRelativeForce(new Vector3(Vector3.forward.x,Vector3.forward.z) * speed * network_drive_dir);
   
           Vector3 localVeLocity = transform.InverseTransformDirection(rigidbody.veLocity);
           localVeLocity.x = 0;
           rigidbody.veLocity = transform.TransformDirection(localVeLocity);
       }
   
       void Turn(float network_turn)
       {
           Vector3 update_rotation = Vector3.zero;
   
           update_rotation = Vector3.up * turn_speed * network_turn;
   
           rigidbody.AddTorque(update_rotation);
       }
   
       public void Swap(bool newInvd)
       {
           if (newInvd)
               current_index = 0;
   
           if (!isTurning)
           {
               turn_dir = indv.turn[current_index / 2];
               //Debug.Log(current_index + "(t): " + turn_dir);
               isTurning = true;
             
           }
           else
           {
               drive_dir = indv.acc[current_index / 2];
               //Debug.Log(current_index + "(a): " + drive_dir);
               isTurning = false;
           }
           current_index++;
       }
   
       private void ExtraGravity()
       {
           rigidbody.AddForce(Vector3.down * gravity_multiplayer);
       }
   
       public void Fresh()
       {
           turn_dir = 0f;
           drive_dir = indv.acc[0];
       }
   }

此脚本检查碰撞、空闲或是否完成轨道重置,并为每个部分切换汽车的输入


    public class EnvObserv : MonoBehavIoUr
    {
        [Serializefield]
        private GameObject[] checkpoints;
    
        private Movment player;
        private Vector3
            pos_start,rot_start;
    
        private float time_start;
        private float last_time_check = 0;
    
        private int no_checkpoints = 0;
        private int last_checkpoint = 0;
    
        private bool forward = true;
        private float best_time = 999999f;
    
    
        // Start is called before the first frame update
        void Awake()
        {
            player = GetComponent<Movment>();
            pos_start = new Vector3(116.9f,4.29f,-96f);
            rot_start = Vector3.zero;
            time_start = Time.time;
        }
    
        // Update is called once per frame
        void Update()
        {
            if(last_time_check > 10f)
                CheckIdle();
    
            last_time_check += Time.deltaTime;
        }
    
        private void OnCollisionEnter(Collision collision)
        {
            if(collision.collider.CompareTag("TrackWall"))
                Reset(false);
        }
    
        private void OnTriggerEnter(Collider other)
        {
            if (other.CompareTag("Checkpoint"))
            {
                float time = Time.time - time_start;
                Save(time);
                //SaveFirstRun(forward);
                Reset(true,time);
            }
            if (other.CompareTag("Swap"))
            {
                no_checkpoints++;
                player.Swap(false);
                other.gameObject.SetActive(false);
                //SaveFirstRun(forward);
                forward = !forward;
            }
        }
    
        private void Reset(bool finished,float time = -1)
        {
            GetComponent<Rigidbody>().veLocity = Vector3.zero;
            GetComponent<Rigidbody>().angularVeLocity = Vector3.zero;
    
            if (!finished)
            {
                player.indv = FindobjectOfType<Population>().GetNext(no_checkpoints,time);
            }
            else
            {
                player.indv = FindobjectOfType<Population>().GetNext(no_checkpoints,time);
            }
    
            player.current_index = 0;
            transform.position = pos_start;
            transform.eulerAngles = rot_start;
            no_checkpoints = 0;
            last_time_check = 0;
            time_start = Time.time;
            GetComponent<Rigidbody>().veLocity = Vector3.zero;
            GetComponent<Rigidbody>().angularVeLocity = Vector3.zero;
            ResetCheckpoints();
            player.Fresh();
        }
    
        void ResetCheckpoints()
        {
            foreach (var check in checkpoints)
            {
                check.SetActive(true);
            }
        }
    
        void CheckIdle()
        {
            if(no_checkpoints - last_checkpoint < 2)
            {
                Reset(false);
            }
    
            last_checkpoint = no_checkpoints;
            last_time_check = 0f;
        }

这个脚本处理进化


public class Population : MonoBehavIoUr
{
    [Serializefield]
    private Movment car;

    [Header("Population variables")]
    public int pop_size = 20;
    public int indv_lenght = 97;
    //public float mutate_prob = 0.01f;
    public int genes_to_mutate = 10;
    public float genes_cross_prob = 0.05f;
    public float deviation = 0.1f;
    public int pos_to_mutate = 4;

    [Header("View")]
    public int current_indv = 0;
    public int current_generation = 1;
    public float best_time_all_generations = 9999f;

    [HideInInspector]
    public List<Individual> population = new List<Individual>();
    // Start is called before the first frame update
    void Awake()
    {
        Init();
        best_time_all_generations = 9999f;
    }

    // Update is called once per frame
    void Update()
    {
    }

    void InitPop()
    {
        for (int i = 0; i < pop_size; i++)
        {
            Individual indv = new Individual();
            indv.Randomize(indv_lenght);
            population.Add(indv);
        }
        StartTrainning();
    }

    void Init()
    {
        Individual first = Readindv("E:/Unity/Projects/MiniRace Sectioned/Assets/Scripts/Initial data.txt");
        population.Add(first.Getcopy());

        population = FillWithMutations(first);

        StartTrainning();
    }

    void StartTrainning()
    {
        car.indv = population[0];
        car.Fresh();
    }

    public Individual GetNext(int fitness,float time)
    {
        if(current_indv == pop_size - 1)
        {
            Repopulate();
            current_indv = 0;
            current_generation++;

            if (current_generation > 1000) Debug.Break();
        }
        else
        {
            population[current_indv].fitness = fitness;
            population[current_indv].time = time;
            current_indv++;
        }

        return population[current_indv];
    }

    void Repopulate()
    {
        List<Individual> new_pop = new List<Individual>();

        new_pop.Add(Best());//CHECKED
        
        while (new_pop.Count < pop_size / 2)
        {
            new_pop.Add(Turnir());
        }//CHECKED

        new_pop = Cross(new_pop);//CHECKED
        new_pop = MutationManager(new_pop);//CHECKED

        population.Clear();
        for (int i = 0; i < new_pop.Count; i++)
        {
            population.Add(new_pop[i]);
            population[i].fitness = 0;
            population[i].time = -1;
        }
    }

    Individual Best()
    {
        Individual best = new Individual();
        float best_time = 999999f;
        int best_value = 0;

        for(int i = 0; i < pop_size; i++)
        {
            if (population[i].fitness > best_value)
            {
                best = population[i].Getcopy();
                best_time = population[i].time;
                best_value = population[i].fitness;
            }
            else if (population[i].time > 0) {
                if (population[i].fitness == best_value && population[i].time <= best_time)
                {
                    best = population[i].Getcopy();
                    best_time = population[i].time;
                    best_value = population[i].fitness;
                }
            }
        }
        if (best_time < best_time_all_generations && best_time > 0)
            best_time_all_generations = best_time;

        Debug.Log(current_generation.ToString() + "  :  " + best_time);
        return best;
    }

    //PICKS 3 RANDOM INDIVIDUALS FROM POPULATION AND RETURNS THE BEST ONE
    Individual Turnir()
    {
        Individual indv1 = new Individual();
        Individual indv2 = new Individual();
        Individual indv3 = new Individual();

        int val1 = Random.Range(0,pop_size - 1);
        int val2 = Random.Range(0,pop_size - 1);
        int val3 = Random.Range(0,pop_size - 1);

        indv1 = population[val1];
        indv2 = population[val2];
        indv3 = population[val3];

        List<Individual> turnir = new List<Individual>();

        turnir.Add(indv1);
        turnir.Add(indv2);
        turnir.Add(indv3);

        int best_val = 0;
        int best_ind = -1;
        float best_time = 99999f;

        for(int i = 0; i < 3; i++)
        {
            if(turnir[i].fitness > best_val)
            {
                best_val = turnir[i].fitness;
                best_ind = i;
                best_time = turnir[i].time;
            }
            else if(turnir[i].fitness == best_val && turnir[i].time <= best_time)
            {
                best_val = turnir[i].fitness;
                best_ind = i;
                best_time = turnir[i].time;
            }
        }
        return turnir[best_ind];    
    }

    
    List<Individual> Cross(List<Individual> pop)
    {
        List<Individual> new_pop = new List<Individual>();

        for(int i = 0; i < pop.Count; i++)
        {
            Individual indv1 = new Individual();
            Individual indv2 = new Individual();
            Individual kid = new Individual();

            indv1 = pop[i].Getcopy();
            indv2 = pop[Random.Range(0,pop.Count - 1)].Getcopy();
            kid.fitness = indv1.fitness;
            kid.time = indv1.time;

            for (int j = 0; j < indv1.acc.Count; j++)
            {
                if (Random.Range(0f,1f) < genes_cross_prob)
                {
                    kid.acc.Add(indv2.acc[j]);
                    kid.turn.Add(indv2.turn[j]);
                }
                else
                {
                    kid.acc.Add(indv1.acc[j]);
                    kid.turn.Add(indv1.turn[j]);
                }
            }

            new_pop.Add(kid);
        }

        for(int i = 0; i < new_pop.Count; i++)
        {
            pop.Add(new_pop[i]);

        }

        return pop;
    }

    //CHECKS IF AN INDIVIDUAL FINISHED OR NOT THE RACE
    //IF IT DIDN'T THEN IT MUTATES LAST 6 GEnes THAT WERE ACTIVE BEFOARE CRASH
    //AND IF IT DID FINISHED THE RACE THEN IT MUTATES 10 RANDOM GEnes IN HOPE THAT IT
    //WILL LEAD TO BETTER TIME
    List<Individual> MutationManager(List<Individual> pop)
    {
        List<Individual> new_pop = new List<Individual>();
        
        for(int i = 0; i < pop.Count; i++)
        {
            if (i > 0)
            {
                if (pop[i].time == -1)
                {
                    new_pop.Add(MutateLastPos(pop[i].Getcopy()));
                }   
                else
                {
                    new_pop.Add(MutateOne(pop[i].Getcopy()));
                }
            }
            else
                new_pop.Add(pop[i].Getcopy());
        }

        return new_pop;
    }

    //The function mutates last x number of genes that were active before crash
    Individual MutateLastPos(Individual indv)
    {
        int last_checkpoint = indv.fitness;
        int local_pos_to_mutate = pos_to_mutate;

        if(last_checkpoint < local_pos_to_mutate)
        {
            local_pos_to_mutate = last_checkpoint;
        }

        last_checkpoint--;
        last_checkpoint /= 2;

        for (int j = 1; j <= local_pos_to_mutate / 2; j++)
        {
            float new_val_acc = indv.acc[last_checkpoint - j] + Random.Range(0,deviation);
            float new_val_turn = indv.turn[last_checkpoint - j] + Random.Range(-deviation,deviation);

            new_val_acc = Mathf.Clamp(new_val_acc,0f,1f);
            new_val_turn = Mathf.Clamp(new_val_turn,-1f,1f);

            indv.acc[last_checkpoint - j] = new_val_acc;
            indv.turn[last_checkpoint - j] = new_val_turn;
        }
        
        return indv;
    }

    //REFACTORED
    List<Individual> FillWithMutations(Individual indv)
    {
        List<Individual> new_pop = new List<Individual>();

        new_pop.Add(indv.Getcopy());

        while(new_pop.Count < pop_size)
        {
            var mutation = MutateOne(indv.Getcopy());
            new_pop.Add(mutation);
        }
        return new_pop;
    }

    //REFACTORED
    //MUTATES A NUMBER OF GEnes FOR THE INDIVIDUAL SENT AS ParaMATER
    Individual MutateOne(Individual indv)
    {
        List<int> genes_to_mutate_acc = new List<int>();
        List<int> genes_to_mutate_turn = new List<int>();

        for(int i = 0; i < genes_to_mutate / 2; i++)
        {
            int val_acc = Random.Range(0,(indv_lenght - 1) / 2);
            int val_turn = Random.Range(0,(indv_lenght - 1) / 2);

            while (genes_to_mutate_acc.Contains(val_acc))
            {
                val_acc = Random.Range(0,(indv_lenght - 1) / 2);
            }

            while (genes_to_mutate_turn.Contains(val_turn))
            {
                val_turn = Random.Range(0,(indv_lenght - 1) / 2);
            }

            genes_to_mutate_acc.Add(val_acc);
            genes_to_mutate_turn.Add(val_turn);
        }

        for(int i = 0; i < genes_to_mutate / 2; i++)
        {
            float acc = indv.acc[genes_to_mutate_acc[i]];
            float turn = indv.turn[genes_to_mutate_turn[i]];

            acc += Random.Range(0,deviation);
            turn += Random.Range(-deviation,deviation);

            acc = Mathf.Clamp(acc,1f);
            turn = Mathf.Clamp(turn,1f);

            indv.acc[genes_to_mutate_acc[i]] = acc;
            indv.turn[genes_to_mutate_turn[i]] = turn;
        }

        indv.fitness = 0;
        indv.time = -1;

        return indv;
    }

    //READ AN INDIVIDUAL FROM A FILE
    // !!!IMPORTATNT!!! THIS GUYS WILL FINISH THE RACE 100% IN THE FirsT GENERATION
    Individual Readindv(string path)
    {
        List<string> lines = new List<string>();
        lines = File.ReadAllLines(path).ToList();

        Individual good_indv = new Individual();

        for(int i = 0; i < lines.Count; i += 2)
        {
            good_indv.acc.Add(float.Parse(lines[i].Substring(2)));
            good_indv.turn.Add(float.Parse(lines[i + 1].Substring(2)));
        }
        return good_indv;
    }


我制作了进化部分,以便第一辆车的信息(如果我第一次玩的话,女巫100%完成赛道)永远不会丢失。确实我检查了它并且所有值都相同但是出于某种奇怪的原因,当脚本达到第 2 代并且第一辆车播放时,即使它的“大脑”中没有更改的值并且来自 EnvObserv 类的重置功能重置,它也会崩溃汽车在零速度/角速度的起始位置。

这是我存储汽车动作的个人类


    public class Individual
    {
        public List<float> acc;
        public List<float> turn;
        public int fitness;
        public float time;
        public Individual()
        {
            acc = new List<float>();
            turn = new List<float>();
            fitness = 0;
            time = 0;
        }
    
        public void Randomize(int size)
        {
            for (int i = 0; i < (size + 1) / 2; i++)
            {
                acc.Add(Random.Range(0f,1f));
                turn.Add(Random.Range(-1f,1f));
    
            }
        }
    
        public Individual Getcopy()
        {
            Individual new_indv = new Individual();
    
            for(int i = 0; i < acc.Count; i++)
            {
                new_indv.acc.Add(acc[i]);
                new_indv.turn.Add(turn[i]);
                new_indv.fitness = fitness;
                new_indv.time = time;
            }
    
            return new_indv;
        }
     }

我 100% 确定问题出在重置功能的某个地方。可能不是从零速度开始,也可能是开始位置有点错误

顺便说一句,我对起始位置进行了硬编码,因为我认为 Unity 可能会以某种方式对其进行舍入,并且我在开始游戏之前对检查器的值进行了硬编码

---------->编辑更新

保存功能是 EnvObserv.Save(),当一个人完成轨道时调用

也许第 0 代和其他代之间有一些小的变化。我无法解释发生这种情况的原因、方式或原因... :/

这是为了更好的视野的地图

Here is the map for better vision

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)