diff --git a/egs/gop_speechocean762/README.md b/egs/gop_speechocean762/README.md
index 77b520eadee..1c39f2f1cc6 100644
--- a/egs/gop_speechocean762/README.md
+++ b/egs/gop_speechocean762/README.md
@@ -1,8 +1,3 @@
-There is a copy of this document on Google Docs, which renders the equations better:
-[link](https://docs.google.com/document/d/1pie-PU6u2NZZC_FzocBGGm6mpfBJMiCft9UoG0uA1kA/edit?usp=sharing)
-
-* * *
-
 # GOP on Kaldi
 
 The Goodness of Pronunciation (GOP) is a variation of the posterior probability, for phone level pronunciation scoring.
diff --git a/egs/gop_speechocean762/s5/run.sh b/egs/gop_speechocean762/s5/run.sh
index cf081a18133..989d247736f 100755
--- a/egs/gop_speechocean762/s5/run.sh
+++ b/egs/gop_speechocean762/s5/run.sh
@@ -2,6 +2,7 @@
 
 # Copyright      2019  Junbo Zhang
 #           2020-2021  Xiaomi Corporation (Author: Junbo Zhang, Yongqing Wang)
+#                2024  Jiun-Ting Li (National Taiwan Normal University)
 # Apache 2.0
 
 # This script shows how to calculate Goodness of Pronunciation (GOP) and
@@ -175,6 +176,7 @@ if [ $stage -le 12 ]; then
       compute-gop --phone-map=data/lang_nosp/phone-to-pure-phone.int \
         --skip-phones-string=0:1:2 \
         $model/final.mdl \
+        "ark,t:gunzip -c exp/ali_$part/ali.JOB.gz|" \
         "ark,t:gunzip -c exp/ali_$part/ali-phone.JOB.gz|" \
         "ark:exp/probs_$part/output.JOB.ark" \
         "ark,scp:exp/gop_$part/gop.JOB.ark,exp/gop_$part/gop.JOB.scp" \
diff --git a/src/bin/compute-gop.cc b/src/bin/compute-gop.cc
index a6db0fc0c9e..08847579f85 100644
--- a/src/bin/compute-gop.cc
+++ b/src/bin/compute-gop.cc
@@ -1,6 +1,7 @@
 // bin/compute-gop.cc
 
 // Copyright 2019  Junbo Zhang
+//           2024  Jiun-Ting Li (National Taiwan Normal University)
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -107,11 +108,14 @@ int main(int argc, char *argv[]) {
     const char *usage =
         "Compute Goodness Of Pronunciation (GOP) from a matrix of "
         "probabilities (e.g. from nnet3-compute).\n"
-        "Usage:  compute-gop [options] <model> <alignments-rspecifier> "
+        "Usage:  compute-gop [options] <model> "
+        "<transition-alignments-respecifier> "
+        "<phoneme-alignments-rspecifier> "
         "<prob-matrix-rspecifier> <gop-wspecifier> "
-        "[<phone-feature-wspecifier>]\n"
+        "<phone-feature-wspecifier>\n"
         "e.g.:\n"
-        " nnet3-compute [args] | compute-gop 1.mdl ark:ali-phone.1 ark:-"
+        " nnet3-compute [args] | compute-gop 1.mdl ark:ali.1 ark:ali-phone.1 "
+        " ark:output.1.ark "
         " ark:gop.1 ark:phone-feat.1\n";
 
     ParseOptions po(usage);
@@ -130,16 +134,17 @@ int main(int argc, char *argv[]) {
 
     po.Read(argc, argv);
 
-    if (po.NumArgs() != 4 && po.NumArgs() != 5) {
+    if (po.NumArgs() != 6) {
       po.PrintUsage();
       exit(1);
     }
 
     std::string model_filename = po.GetArg(1),
-                alignments_rspecifier = po.GetArg(2),
-                prob_rspecifier = po.GetArg(3),
-                gop_wspecifier = po.GetArg(4),
-                feat_wspecifier = po.GetArg(5);
+                transition_alignments_rspecifier = po.GetArg(2),
+                phoneme_alignments_rspecifier = po.GetArg(3),
+                prob_rspecifier = po.GetArg(4),
+                gop_wspecifier = po.GetArg(5),
+                feat_wspecifier = po.GetArg(6);
 
     TransitionModel trans_model;
     {
@@ -174,7 +179,8 @@ int main(int argc, char *argv[]) {
       }
     }
 
-    RandomAccessInt32VectorReader alignment_reader(alignments_rspecifier);
+    RandomAccessInt32VectorReader phoneme_alignments_reader(phoneme_alignments_rspecifier);
+    RandomAccessInt32VectorReader transition_alignments_reader(transition_alignments_rspecifier);
     SequentialBaseFloatMatrixReader prob_reader(prob_rspecifier);
     PosteriorWriter gop_writer(gop_wspecifier);
     BaseFloatVectorWriter feat_writer(feat_wspecifier);
@@ -182,25 +188,41 @@ int main(int argc, char *argv[]) {
     int32 num_done = 0;
     for (; !prob_reader.Done(); prob_reader.Next()) {
       std::string key = prob_reader.Key();
-      if (!alignment_reader.HasKey(key)) {
-        KALDI_WARN << "No alignment for utterance " << key;
+      if (!phoneme_alignments_reader.HasKey(key)) {
+        KALDI_WARN << "No phoneme alignment for utterance " << key;
         continue;
       }
-      auto alignment = alignment_reader.Value(key);
+      if (!transition_alignments_reader.HasKey(key)) {
+        KALDI_WARN << "No transition alignment for utterance " << key;
+        continue;
+      }
+      auto phoneme_alignment = phoneme_alignments_reader.Value(key);
+      auto transition_alignment = transition_alignments_reader.Value(key);
       Matrix<BaseFloat> &probs = prob_reader.Value();
       if (log_applied) probs.ApplyExp();
 
+      std::vector<std::vector<int32> > split;
+      SplitToPhones(trans_model, transition_alignment, &split);
+
+      std::vector<int32> phone_boundary;
+      for (int32 i = 0; i < split.size(); i++) {
+        for (int32 j = 0; j < split[i].size(); j++) {
+          phone_boundary.push_back(i);
+        }
+      }
+
       Matrix<BaseFloat> lpps;
       ComputeLpps(probs, pdf2phones, &lpps);
 
-      int32 frame_num = alignment.size();
-      if (alignment.size() != probs.NumRows()) {
+      int32 frame_num = phoneme_alignment.size();
+      if (phoneme_alignment.size() != probs.NumRows()) {
         KALDI_WARN << "The frame numbers of alignment and prob are not equal.";
         if (frame_num > probs.NumRows()) frame_num = probs.NumRows();
       }
 
       KALDI_ASSERT(frame_num > 0);
-      int32 cur_phone_id = alignment[0];
+      int32 cur_phone_id = phoneme_alignment[0];
+      int32 cur_phone_pos = phone_boundary[0];
       int32 duration = 0;
       Vector<BaseFloat> phone_level_feat(1 + phone_num * 2);  // [phone LPPs LPRs]
       SubVector<BaseFloat> lpp_part(phone_level_feat, 1, phone_num);
@@ -220,8 +242,9 @@ int main(int argc, char *argv[]) {
         lpp_part.AddVec(1, frame_level_lpp);
         duration++;
 
-        int32 next_phone_id = (i < frame_num - 1) ? alignment[i + 1]: -1;
-        if (next_phone_id != cur_phone_id) {
+        int32 next_phone_id = (i < frame_num - 1) ? phoneme_alignment[i + 1]: -1;
+        int32 next_phone_pos = (i < frame_num - 1) ? phone_boundary[i + 1]: -1;
+        if (next_phone_pos != cur_phone_pos) {
           int32 phone_id = phone_map.empty() ? cur_phone_id : phone_map[cur_phone_id];
 
           // The current phone's feature have been ready
@@ -248,6 +271,7 @@ int main(int argc, char *argv[]) {
           duration = 0;
         }
         cur_phone_id = next_phone_id;
+        cur_phone_pos = next_phone_pos;
       }
 
       // Write GOPs and the GOP-based features