diff --git a/.github/workflows/auto-update.yml b/.github/workflows/auto-update.yml new file mode 100644 index 0000000..5dbe7ff --- /dev/null +++ b/.github/workflows/auto-update.yml @@ -0,0 +1,128 @@ +name: Regular Auto Update + +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + +on: + schedule: + # 每两周的周一凌晨2点运行 + - cron: '0 2 * * 1/2' + workflow_dispatch: + +permissions: + contents: write + pull-requests: write + +concurrency: + group: auto-scrape + cancel-in-progress: false + +jobs: + scrape-commit: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + submodules: recursive + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Detect scraper directory + run: | + if [ -x "./sp" ]; then + echo "SCRAPER_DIR=." >> "$GITHUB_ENV" + elif [ -x "./dblp-publications-scraper/sp" ]; then + echo "SCRAPER_DIR=./dblp-publications-scraper" >> "$GITHUB_ENV" + else + echo "Cannot find sp launcher. Checked ./sp and ./dblp-publications-scraper/sp" + ls -la + exit 1 + fi + + - name: Install scraper dependencies + working-directory: ${{ env.SCRAPER_DIR }} + run: | + chmod +x ./sp + ./sp i + + - name: Run scraper + working-directory: ${{ env.SCRAPER_DIR }} + env: + VENUE_SHORT_LLM_API_KEY: ${{ secrets.VENUE_SHORT_LLM_API_KEY }} + run: | + CONFIG_FILE="config.github.json" + if [ ! -f "$CONFIG_FILE" ]; then + CONFIG_FILE="config.json" + fi + + if [ ! -f "$CONFIG_FILE" ]; then + echo "No config file found in $PWD" + ls -la + exit 1 + fi + + CI_CONFIG_FILE="config.ci.json" + python - <<'PY' + import json + + source_file = "config.github.json" + try: + with open(source_file, "r", encoding="utf-8") as f: + cfg = json.load(f) + except FileNotFoundError: + source_file = "config.json" + with open(source_file, "r", encoding="utf-8") as f: + cfg = json.load(f) + + cfg["existing_js_path"] = "../collection/auto-collected/auto_collected.js" + + with open("config.ci.json", "w", encoding="utf-8") as f: + json.dump(cfg, f, ensure_ascii=False, indent=2) + + print(f"Prepared CI config from: {source_file}") + PY + + echo "Using config: $CI_CONFIG_FILE" + ./sp --config "$CI_CONFIG_FILE" + + + + - name: Check for changes to commit + id: check_changes + run: | + git add -A + if git diff --cached --quiet; then + echo "changes_detected=false" >> $GITHUB_OUTPUT + echo "No changes to commit, skipping PR." + exit 0 + else + echo "changes_detected=true" >> $GITHUB_OUTPUT + fi + + - name: Create pull request for updates + if: steps.check_changes.outputs.changes_detected == 'true' + uses: peter-evans/create-pull-request@v7 + with: + commit-message: "chore: auto update publications" + branch: "chore/auto-scrape-updates" + delete-branch: true + title: "chore: auto update publications" + body: | + This PR is automatically generated by the Auto Scrape workflow. + + Changes include: + - refreshed auto-collected publications + - updated bundled JSON assets + labels: | + automation + publications diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..8b97dcb --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "dblp-publications-scraper"] + path = dblp-publications-scraper + url = https://github.com/imethanguo/dblp-publications-scraper.git diff --git a/collection/auto_collected.js b/collection/auto-collected/auto_collected.js similarity index 99% rename from collection/auto_collected.js rename to collection/auto-collected/auto_collected.js index 535032d..141fa03 100644 --- a/collection/auto_collected.js +++ b/collection/auto-collected/auto_collected.js @@ -998,4 +998,4 @@ module.exports = [ "bibtex": "@inproceedings{DBLP:conf/kbse/LiuWZ00C21,\n author = {Lu Liu and\n Lili Wei and\n Wuqi Zhang and\n Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung},\n title = {Characterizing Transaction-Reverting Statements in Ethereum Smart\n Contracts},\n booktitle = {36th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2021, Melbourne, Australia, November 15-19, 2021},\n pages = {630--641},\n publisher = {{IEEE}},\n year = {2021},\n url = {https://doi.org/10.1109/ASE51524.2021.9678597},\n doi = {10.1109/ASE51524.2021.9678597},\n timestamp = {Fri, 16 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/kbse/LiuWZ00C21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}", "awards": [] } -] +] \ No newline at end of file diff --git a/dblp-publications-scraper b/dblp-publications-scraper new file mode 160000 index 0000000..00b8dd8 --- /dev/null +++ b/dblp-publications-scraper @@ -0,0 +1 @@ +Subproject commit 00b8dd8ebb75c36fa46083456dc05deb96646cae diff --git a/public/bundle.json b/public/bundle.json index 5b666c7..4b74b8b 100644 --- a/public/bundle.json +++ b/public/bundle.json @@ -1 +1 @@ -[{"title":"A study on prompt design, advantages and limitations of ChatGPT for deep learning program repair.","date":"2025","authors":["Jialun Cao","Meiziniu Li","Ming Wen","Shing-Chi Cheung"],"venue":"Automated Software Engineering","venueShort":"ASEJ","abstract":"The emergence of large language models (LLMs) such as ChatGPT has revolutionized many fields. In particular, recent advances in LLMs have triggered various studies examining the use of these models for software development tasks, such as program repair, code understanding, and code generation. Prior studies have shown the capability of ChatGPT in repairing conventional programs. However, debugging deep learning (DL) programs poses unique challenges since the decision logic is not directly encoded in the source code. This requires LLMs to not only parse the source code syntactically but also understand the intention of DL programs. Therefore, ChatGPT’s capability in repairing DL programs remains unknown. To fill this gap, our study aims to answer three research questions: (1) Can ChatGPT debug DL programs effectively? (2) How can ChatGPT’s repair performance be improved by prompting? (3) In which way can dialogue help facilitate the repair? Our study analyzes the typical information that is useful for prompt design and suggests enhanced prompt templates that are more efficient for repairing DL programs. On top of them, we summarize the dual perspectives (i.e., advantages and disadvantages) of ChatGPT’s ability, such as its handling of API misuse and recommendation, and its shortcomings in identifying default parameters. Our findings indicate that ChatGPT has the potential to repair DL programs effectively and that prompt engineering and dialogue can further improve its performance by providing more code intention. We also identified the key intentions that can enhance ChatGPT’s program repairing capability.","tags":["LLM","Program Repair","Deep Learning","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/s10515-025-00492-x","bibtex":"@article{DBLP:journals/ase/CaoLWC25,\n author = {Jialun Cao and\n Meiziniu Li and\n Ming Wen and\n Shing{-}Chi Cheung},\n title = {A study on prompt design, advantages and limitations of ChatGPT for\n deep learning program repair},\n journal = {Autom. Softw. Eng.},\n volume = {32},\n number = {1},\n pages = {30},\n year = {2025},\n url = {https://doi.org/10.1007/s10515-025-00492-x},\n doi = {10.1007/S10515-025-00492-X},\n timestamp = {Sun, 15 Jun 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/ase/CaoLWC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"How far are app secrets from being stolen? a case study on android.","date":"2025","authors":["Lili Wei","Heqing Huang","Shing-Chi Cheung","Kevin Li"],"venue":"Empirical Software Engineering","venueShort":"EMSE","abstract":"Android apps can hold secret strings of themselves such as cloud service credentials or encryption keys. Leakage of such secret strings can induce unprecedented consequences like monetary losses or leakage of user private information. In practice, various security issues were reported because many apps failed to protect their secrets. However, litte is known about the types, usages, exploitability, and consequences of app secret leakage issues. While a large body of literature has been devoted to studying user private information leakage, there is no systematic study characterizing app secret leakage issues. How far are Android app secrets from being stolen? To bridge this gap, we conducted the first systematic study to characterize app secret leakage issues in Android apps based on 575 potential app secrets sampled from 14,665 popular Android apps on Google Play. We summarized the common categories of leaked app secrets, assessed their security impacts and disclosed app bad practices in storing app secrets. We devised a text mining strategy using regular expressions and demonstrated that numerous app secrets can be easily stolen, even from the highly popular Android apps on Google. In a follow-up study, we harvested 3,711 distinct exploitable app secrets through automatic analysis. Our findings highlight the prevalence of this problem and call for greater attention to app secret protection.","tags":["Android","Security","Empirical study","regular expression"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/s10664-024-10607-9","bibtex":"@article{DBLP:journals/ese/WeiHCL25,\n author = {Lili Wei and\n Heqing Huang and\n Shing{-}Chi Cheung and\n Kevin Li},\n title = {How far are app secrets from being stolen? a case study on android},\n journal = {Empir. Softw. Eng.},\n volume = {30},\n number = {3},\n pages = {90},\n year = {2025},\n url = {https://doi.org/10.1007/s10664-024-10607-9},\n doi = {10.1007/S10664-024-10607-9},\n timestamp = {Fri, 16 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/ese/WeiHCL25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"On state reverting in solidity smart contracts: Developer practices, fault categorization, and tool evaluation.","date":"2025","authors":["Lu Liu","Lili Wei","Wuqi Zhang","Shuqing Li","Yifan Zhou","Yepang Liu","Shing-Chi Cheung","Michael R. Lyu"],"venue":"Empirical Software Engineering","venueShort":"EMSE","abstract":"Smart contracts are computer programs deployed on blockchains to facilitate transactions. A critical aspect of smart contract security is the use of state-reverting statements (e.g., require, if...revert, if...throw). These statements protect transactions from abnormal behaviors or malicious attacks by reverting a contract to its previous state when certain input constraints or security properties are violated. While essential, the correct use of these state-reverting (SR) statements is nontrivial. Improper use can lead to security vulnerabilities, resulting in substantial financial losses or other severe consequences. It is, therefore, highly important to understand developers’ practices of state reverting in smart contracts and the common mistakes they make. To achieve this goal, we conduct the first comprehensive empirical study on the use of SR statements and their related faults in Solidity smart contracts. First, we analyze the prevalence and purposes of SR statements in 21,414 verified contracts from popular decentralized applications (dapps) and manually examine 381 SR statements, leading to a taxonomy of their uses. Second, we collect 320 real-world state-reverting faults (SR faults) from open-source projects on GitHub and audit reports on Code4rena. We categorize the SR faults into 17 types and summarize 12 distinct fixing strategies. This knowledge can help researchers and practitioners to better understand the common usages of SR statements and learn how to prevent or cope with SR faults. Lastly, the variety of SR fault types and the presence of high-risk issues highlight the need for automated tools to identify and mitigate these faults. This further motivates us to assess the SR fault detection performance of state-of-the-art security analyzers, with the aim of understanding their capability and identifying their deficiencies. Via evaluating 12 representative tools on a benchmark comprising 243 contracts with six types of SR faults and the corresponding patched versions, we observe that existing tools exhibit limited capabilities in detecting SR faults (the average detection rate is 14.4%). This result underscores the need for more advanced security analysis tools specifically tailored for SR faults. To facilitate the development of such tools, we further provide a comprehensive analysis of three common limitations of existing tools.","tags":["Smart Contracts","Empirical study","Security","Vulnerability"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/s10664-025-10685-3","bibtex":"@article{DBLP:journals/ese/LiuWZLZLCL25,\n author = {Lu Liu and\n Lili Wei and\n Wuqi Zhang and\n Shuqing Li and\n Yifan Zhou and\n Yepang Liu and\n Shing{-}Chi Cheung and\n Michael R. Lyu},\n title = {On state reverting in solidity smart contracts: Developer practices,\n fault categorization, and tool evaluation},\n journal = {Empir. Softw. Eng.},\n volume = {30},\n number = {5},\n pages = {141},\n year = {2025},\n url = {https://doi.org/10.1007/s10664-025-10685-3},\n doi = {10.1007/S10664-025-10685-3},\n timestamp = {Tue, 05 Aug 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/ese/LiuWZLZLCL25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Understanding and Characterizing Mock Assertions in Unit Tests.","date":"2025","authors":["Hengcheng Zhu","Valerio Terragni","Lili Wei","Shing-Chi Cheung","Jiarong Wu","Yepang Liu"],"venue":"Proceedings of the ACM on Software Engineering","venueShort":"FSE","abstract":"Mock assertions provide developers with a powerful means to validate program behaviors that are unobservable to test assertions. Despite their significance, they are rarely considered by automated test generation techniques. Effective generation of mock assertions requires understanding how they are used in practice. Although previous studies highlighted the importance of mock assertions, none provide insight into their usages. To bridge this gap, we conducted the first empirical study on mock assertions, examining their adoption, the characteristics of the verified method invocations, and their effectiveness in fault detection. Our analysis of 4,652 test cases from 11 popular Java projects reveals that mock assertions are mostly applied to validating specific kinds of method calls, such as those interacting with external resources and those reflecting whether a certain code path was traversed in systems under test. Additionally, we find that mock assertions complement traditional test assertions by ensuring the desired side effects have been produced, validating control flow logic, and checking internal computation results. Our findings contribute to a better understanding of mock assertion usages and provide a foundation for future related research such as automated test generation that support mock assertions.","tags":["Mocking","Empirical study","Unit Test","Java"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3715741","bibtex":"@article{DBLP:journals/pacmse/ZhuTWCWL25,\n author = {Hengcheng Zhu and\n Valerio Terragni and\n Lili Wei and\n Shing{-}Chi Cheung and\n Jiarong Wu and\n Yepang Liu},\n title = {Understanding and Characterizing Mock Assertions in Unit Tests},\n journal = {Proc. {ACM} Softw. Eng.},\n volume = {2},\n number = {{FSE}},\n pages = {554--575},\n year = {2025},\n url = {https://doi.org/10.1145/3715741},\n doi = {10.1145/3715741},\n timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/pacmse/ZhuTWCWL25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"SemBIC: Semantic-Aware Identification of Bug-Inducing Commits.","date":"2025","authors":["Xiao Chen","Hengcheng Zhu","Jialun Cao","Ming Wen","Shing-Chi Cheung"],"venue":"Proceedings of the ACM on Software Engineering","venueShort":"FSE","abstract":"Debugging can be much facilitated if one can identify the evolution commit that introduced the bug leading to a detected failure (aka. bug-inducing commit, BIC). Although one may, in theory, locate BICs by executing the detected failing test on various historical commit versions, it is impractical when the test cannot be executed on some of those versions. On the other hand, existing static techniques often assume the availability of additional information such as patches and bug reports, or the applicability of predefined heuristics like commit chronology. However, these approaches are ineffective when such assumptions do not hold, which are often the case in practice. To address these limitations, we propose SEMBIC to identify the BIC of a bug by statically tracking the semantic changes in the execution path prescribed by the failing test across successive historical commit versions. Our insight is that the greater the semantic changes a commit introduces concerning the failing execution path of a target bug, the more likely it is to be the BIC. To distill semantic changes relevant to the failure, we focus on three fine-grained semantic properties. We evaluate the performance of SEMBIC on a benchmark containing 199 real-world bugs from 12 open-source projects. We found that SEMBIC can identify BICs with high accuracy – it ranks the BIC as top 1 for 88 out of 199 bugs, and achieves an MRR of 0.520, outperforming the state-of-the-art technique by 29.4% and 13.6%, respectively.","tags":["Bug Detection","Program Analysis","Empirical study","Testing"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3715781","bibtex":"@article{DBLP:journals/pacmse/ChenZCWC25,\n author = {Xiao Chen and\n Hengcheng Zhu and\n Jialun Cao and\n Ming Wen and\n Shing{-}Chi Cheung},\n title = {SemBIC: Semantic-Aware Identification of Bug-Inducing Commits},\n journal = {Proc. {ACM} Softw. Eng.},\n volume = {2},\n number = {{FSE}},\n pages = {1363--1385},\n year = {2025},\n url = {https://doi.org/10.1145/3715781},\n doi = {10.1145/3715781},\n timestamp = {Mon, 23 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/pacmse/ChenZCWC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"An Empirical Study of Bugs in Data Visualization Libraries.","date":"2025","authors":["Weiqi Lu","Yongqiang Tian","Xiaohan Zhong","Haoyang Ma","Zhenyang Xu","Shing-Chi Cheung","Chengnian Sun"],"venue":"Proceedings of the ACM on Software Engineering","venueShort":"FSE","abstract":"Data visualization (DataViz) libraries play a crucial role in presentation, data analysis, and application development, underscoring the importance of their accuracy in transforming data into visual representations. Incorrect visualizations can adversely impact user experience, distort information conveyance, and influence user perception and decision-making processes. Visual bugs in these libraries can be particularly insidious as they may not cause obvious errors like crashes, but instead mislead users of the underlying data graphically, resulting in wrong decision making. Consequently, a good understanding of the unique characteristics of bugs in DataViz libraries is essential for researchers and developers to detect and fix bugs in DataViz libraries. This study presents the first comprehensive analysis of bugs in DataViz libraries, examining 564 bugs collected from five widely-used libraries. Our study systematically analyzes their symptoms and root causes, and provides a detailed taxonomy. We found that incorrect/inaccurate plots are pervasive in DataViz libraries and incorrect graphic computation is the major root cause, which necessitates further automated testing methods for DataViz libraries. Moreover, we identified eight key steps to trigger such bugs and two test oracles specific to DataViz libraries, which may inspire future research in designing effective automated testing techniques. Furthermore, with the recent advancements in Vision Language Models (VLMs), we explored the feasibility of applying these models to detect incorrect/inaccurate plots. The results show that the effectiveness of VLMs in bug detection varies from 29% to 57%, depending on the prompts, and adding more information in prompts does not necessarily increase the effectiveness. Our findings offer valuable insights into the nature and patterns of bugs in DataViz libraries, providing a foundation for developers and researchers to improve library reliability, and ultimately benefit more accurate and reliable data visualizations across various domains.","tags":["Data Visualization","Bug Detection","Empirical study","Vision Language Models"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3729363","bibtex":"@article{DBLP:journals/pacmse/LuTZMXCS25,\n author = {Weiqi Lu and\n Yongqiang Tian and\n Xiaohan Zhong and\n Haoyang Ma and\n Zhenyang Xu and\n Shing{-}Chi Cheung and\n Chengnian Sun},\n title = {An Empirical Study of Bugs in Data Visualization Libraries},\n journal = {Proc. {ACM} Softw. Eng.},\n volume = {2},\n number = {{FSE}},\n pages = {2075--2098},\n year = {2025},\n url = {https://doi.org/10.1145/3729363},\n doi = {10.1145/3729363},\n timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/pacmse/LuTZMXCS25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Question Selection for Multimodal Code Search Synthesis Using Probabilistic Version Spaces.","date":"2025","authors":["Jiarong Wu","Yanyan Jiang","Lili Wei","Congying Xu","Shing-Chi Cheung","Chang Xu"],"venue":"IEEE Transactions on Software Engineering","venueShort":"TSE","abstract":"Searching the occurrences of specific code patterns (code search) is a common task in software engineering, and programming by example (PBE) techniques have been applied to ease customizing code patterns. However, previous PBE tools only synthesize programs meeting the input-output examples, which may not always align with the user intent. To bridge this gap, this paper proposes Excalibur, a multi-modal (example and natural language description) and interactive synthesizer for code search. Excalibur ensures that the generated programs are correct for the provided examples (soundness) and include the user-intended program (bounded completeness). Furthermore, Excalibur helps the user identify the user-intended program through question-answer interaction. To minimize the required interaction efforts, question selection is crucial. To improve question selection for code search, we propose probabilistic version spaces (ProbVS), in which the user-intended program’s probability is high and others are low. ProbVS combines traditional version spaces for compactly representing extensive programs and large language models (on the user-provided natural language description) for adjusting programs’ probabilities to align with users’ intents. Extensive experiments on a benchmark of 44 tasks demonstrated the effectiveness of Excalibur and ProbVS and demystified how ProbVS affects probability distributions and how the configurable parameters affect ProbVS.","tags":["Programming by Example","Program Synthesis","Code Search","LLM"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/TSE.2025.3565387","bibtex":"@article{DBLP:journals/tse/WuJWXCX25,\n author = {Jiarong Wu and\n Yanyan Jiang and\n Lili Wei and\n Congying Xu and\n Shing{-}Chi Cheung and\n Chang Xu},\n title = {Question Selection for Multimodal Code Search Synthesis Using Probabilistic\n Version Spaces},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {51},\n number = {6},\n pages = {1724--1744},\n year = {2025},\n url = {https://doi.org/10.1109/TSE.2025.3565387},\n doi = {10.1109/TSE.2025.3565387},\n timestamp = {Sun, 06 Jul 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/tse/WuJWXCX25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"DOMAINEVAL: An Auto-Constructed Benchmark for Multi-Domain Code Generation.","date":"2025","authors":["Qiming Zhu","Jialun Cao","Yaojie Lu","Hongyu Lin","Xianpei Han","Le Sun","Shing-Chi Cheung"],"venue":"Thirty-Ninth AAAI Conference on Artificial Intelligence","venueShort":"AAAI","abstract":"Code benchmarks such as HumanEval are widely adopted to evaluate the capabilities of Large Language Models (LLMs), providing insights into their strengths and weaknesses. However, current benchmarks primarily exercise LLMs' capability on common coding tasks (e.g., bubble sort, greatest common divisor), leaving domain-specific coding tasks (e.g., computation, system, cryptography) unexplored. To fill this gap, we propose a multi-domain code benchmark, DOMAINEVAL, designed to evaluate LLMs' coding capabilities thoroughly. Our pipeline works in a fully automated manner, enabling a push-button construction from code repositories into formatted subjects under study. Interesting findings are observed by evaluating 12 representative LLMs against DOMAINEVAL. We notice that LLMs are generally good at computation tasks while falling short on cryptography and system coding tasks. The performance gap can be as much as 68.94% (80.94% - 12.0%) in some LLMs. We also observe that generating more samples can increase the overall performance of LLMs, while the domain bias may even increase. The contributions of this study include a code generation benchmark dataset DOMAINEVAL, encompassing six popular domains, a fully automated pipeline for constructing code benchmarks, and an identification of the limitations of LLMs in code generation tasks based on their performance on DOMAINEVAL, providing directions for future research improvements.","tags":["LLM","Benchmark","Code Generation","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1609/aaai.v39i24.34811","bibtex":"@inproceedings{DBLP:conf/aaai/ZhuC0LH0C25,\n author = {Qiming Zhu and\n Jialun Cao and\n Yaojie Lu and\n Hongyu Lin and\n Xianpei Han and\n Le Sun and\n Shing{-}Chi Cheung},\n editor = {Toby Walsh and\n Julie Shah and\n Zico Kolter},\n title = {{DOMAINEVAL:} An Auto-Constructed Benchmark for Multi-Domain Code\n Generation},\n booktitle = {Thirty-Ninth {AAAI} Conference on Artificial Intelligence, Thirty-Seventh\n Conference on Innovative Applications of Artificial Intelligence,\n Fifteenth Symposium on Educational Advances in Artificial Intelligence,\n {AAAI} 2025, Philadelphia, PA, USA, February 25 - March 4, 2025},\n pages = {26148--26156},\n publisher = {{AAAI} Press},\n year = {2025},\n url = {https://doi.org/10.1609/aaai.v39i24.34811},\n doi = {10.1609/AAAI.V39I24.34811},\n timestamp = {Wed, 18 Mar 2026 17:07:12 +0100},\n biburl = {https://dblp.org/rec/conf/aaai/ZhuC0LH0C25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"CRUXEVAL-X: A Benchmark for Multilingual Code Reasoning, Understanding and Execution.","date":"2025","authors":["Ruiyang Xu","Jialun Cao","Yaojie Lu","Ming Wen","Hongyu Lin","Xianpei Han","Ben He","Shing-Chi Cheung","Le Sun"],"venue":"the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","venueShort":"ACL","abstract":"Ruiyang Xu, Jialun Cao, Yaojie Lu, Ming Wen, Hongyu Lin, Xianpei Han, Ben He, Shing-Chi Cheung, Le Sun. Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2025.","tags":["LLM","Program Synthesis","Natural Language Processing","Empirical Study"],"arxivUrl":null,"paperUrl":"https://aclanthology.org/2025.acl-long.1158/","bibtex":"@inproceedings{DBLP:conf/acl/XuC00LHHC025,\n author = {Ruiyang Xu and\n Jialun Cao and\n Yaojie Lu and\n Ming Wen and\n Hongyu Lin and\n Xianpei Han and\n Ben He and\n Shing{-}Chi Cheung and\n Le Sun},\n editor = {Wanxiang Che and\n Joyce Nabende and\n Ekaterina Shutova and\n Mohammad Taher Pilehvar},\n title = {{CRUXEVAL-X:} {A} Benchmark for Multilingual Code Reasoning, Understanding\n and Execution},\n booktitle = {Proceedings of the 63rd Annual Meeting of the Association for Computational\n Linguistics (Volume 1: Long Papers), {ACL} 2025, Vienna, Austria,\n July 27 - August 1, 2025},\n pages = {23762--23779},\n publisher = {Association for Computational Linguistics},\n year = {2025},\n url = {https://aclanthology.org/2025.acl-long.1158/},\n timestamp = {Sun, 02 Nov 2025 21:27:24 +0100},\n biburl = {https://dblp.org/rec/conf/acl/XuC00LHHC025.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"From Informal to Formal - Incorporating and Evaluating LLMs on Natural Language Requirements to Verifiable Formal Proofs.","date":"2025","authors":["Jialun Cao","Yaojie Lu","Meiziniu Li","Haoyang Ma","Haokun Li","Mengda He","Cheng Wen","Le Sun","Hongyu Zhang","Shengchao Qin","Shing-Chi Cheung","Cong Tian"],"venue":"the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","venueShort":"ACL","abstract":"Jialun Cao, Yaojie Lu, Meiziniu Li, Haoyang Ma, Haokun Li, Mengda He, Cheng Wen, Le Sun, Hongyu Zhang, Shengchao Qin, Shing-Chi Cheung, Cong Tian. Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2025.","tags":["LLM","Program Synthesis","Empirical study"],"arxivUrl":null,"paperUrl":"https://aclanthology.org/2025.acl-long.1310/","bibtex":"@inproceedings{DBLP:conf/acl/Cao0LMLH000QCT25,\n author = {Jialun Cao and\n Yaojie Lu and\n Meiziniu Li and\n Haoyang Ma and\n Haokun Li and\n Mengda He and\n Cheng Wen and\n Le Sun and\n Hongyu Zhang and\n Shengchao Qin and\n Shing{-}Chi Cheung and\n Cong Tian},\n editor = {Wanxiang Che and\n Joyce Nabende and\n Ekaterina Shutova and\n Mohammad Taher Pilehvar},\n title = {From Informal to Formal - Incorporating and Evaluating LLMs on Natural\n Language Requirements to Verifiable Formal Proofs},\n booktitle = {Proceedings of the 63rd Annual Meeting of the Association for Computational\n Linguistics (Volume 1: Long Papers), {ACL} 2025, Vienna, Austria,\n July 27 - August 1, 2025},\n pages = {26984--27003},\n publisher = {Association for Computational Linguistics},\n year = {2025},\n url = {https://aclanthology.org/2025.acl-long.1310/},\n timestamp = {Tue, 24 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/acl/Cao0LMLH000QCT25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"A Tale of Two DL Cities: When Library Tests Meet Compiler.","date":"2025","authors":["Qingchao Shen","Yongqiang Tian","Haoyang Ma","Junjie Chen","Lili Huang","Ruifeng Fu","Shing-Chi Cheung","Zan Wang"],"venue":"47th IEEE/ACM International Conference on Software Engineering","venueShort":"ICSE","abstract":"Deep Learning (DL) compilers typically load a DL model and optimize it with intermediate representation. Existing DL compiler testing techniques mainly focus on model optimization stages, but rarely explore bug detection at the model loading stage. Effectively testing the model loading stage requires covering diverse usages of each DL operator from various DL libraries, which shares a common objective with DL library testing, indicating that the embedded knowledge in DL library tests is beneficial for testing the model loading stage of DL compilers. With this idea, we propose Opera to migrate the knowledge embedded in DL library tests to test the model loading stage. Opera constructs diverse tests from various tests for DL libraries (including the tests documented in DL libraries and those generated by recent fuzzers). In total, we considered three sources of tests in DL libraries for migration. In addition, it incorporates a diversity-based test prioritization strategy to migrate and execute those tests that are more likely to detect diverse bugs earlier. We then used eight frontends from three DL compilers (e.g., TVM, TensorRT, and OpenVINO) for evaluation. OPERA detected 170 previously unknown bugs in total, 90 of which have been confirmed/fixed by developers, demonstrating the effectiveness of such the migration-based idea. The test prioritization strategy in OPERA improves testing efficiency with migrated tests by $11.9 \\% \\sim 47.4 \\%$ on average compared to general test prioritization strategies.","tags":["DL Compiler","Testing","Metamorphic Testing","Program Analysis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ICSE55347.2025.00025","bibtex":"@inproceedings{DBLP:conf/icse/ShenTMCHFCW25,\n author = {Qingchao Shen and\n Yongqiang Tian and\n Haoyang Ma and\n Junjie Chen and\n Lili Huang and\n Ruifeng Fu and\n Shing{-}Chi Cheung and\n Zan Wang},\n title = {A Tale of Two {DL} Cities: When Library Tests Meet Compiler},\n booktitle = {47th {IEEE/ACM} International Conference on Software Engineering,\n {ICSE} 2025, Ottawa, ON, Canada, April 26 - May 6, 2025},\n pages = {2201--2212},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ICSE55347.2025.00025},\n doi = {10.1109/ICSE55347.2025.00025},\n timestamp = {Fri, 04 Jul 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/icse/ShenTMCHFCW25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Differential Testing of Concurrent Classes.","date":"2025","authors":["Valerio Terragni","Shing-Chi Cheung"],"venue":"IEEE Conference on Software Testing","venueShort":"ICST","abstract":"Concurrent programs are pervasive, yet difficult to write. The inherent complexity of thread synchronization makes the evolution of concurrent programs prone to concurrency faults. Previous work on regression testing concurrent programs focused on reducing the cost of re-run the existing tests. However, existing tests may not be able to expose the regression faults in the modified program. In this paper, we present Condiff a differential testing technique that generates concurrent tests and oracles to expose behavioral differences between two versions of a given concurrent class. Since concurrent programs are non-deterministic, this involves exploring all possible non-deterministic thread interleavings of each generated test on both versions. However, we can afford to analyze only a few concurrent tests due to the high cost of exhaustive interleaving exploration. To address the challenge, Condiff leverages the information of code changes and trace analysis to analyze only those concurrent tests that are likely to expose behavioral differences (if they exist). We evaluated Condiff on a set of Java classes. Our results show that Condiff can effectively generate concurrent tests that expose behavioral differences.","tags":["Concurrency","Differential Testing","Regression Testing","Java"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ICST62969.2025.10989027","bibtex":"@inproceedings{DBLP:conf/icst/TerragniC25,\n author = {Valerio Terragni and\n Shing{-}Chi Cheung},\n title = {Differential Testing of Concurrent Classes},\n booktitle = {{IEEE} Conference on Software Testing, Verification and Validation,\n {ICST} 2025, Napoli, Italy, March 31 - April 4, 2025},\n pages = {255--266},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ICST62969.2025.10989027},\n doi = {10.1109/ICST62969.2025.10989027},\n timestamp = {Fri, 30 May 2025 12:14:04 +0200},\n biburl = {https://dblp.org/rec/conf/icst/TerragniC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"CodeCleaner: Mitigating Data Contamination for LLM Benchmarking.","date":"2025","authors":["Jialun Cao","Songqiang Chen","Wuqi Zhang","Hau Ching Lo","Yeting Li","Shing-Chi Cheung"],"venue":"the 16th International Conference on Internetware","venueShort":"Internetware","abstract":"Data contamination presents a critical barrier preventing widespread industrial adoption of advanced software engineering techniques that leverage large language models (LLMs). This phenomenon occurs when evaluation data inadvertently overlaps with the public code repositories used to train LLMs, severely undermining the credibility of performance evaluations. Code refactoring, which comprises code restructuring and variable renaming, has emerged as a promising measure to mitigate data contamination. However, the lack of automated code refactoring tools and scientifically validated refactoring techniques has hampered widespread industrial implementation. To bridge the gap, this paper presents the first systematic study to examine the efficacy of code refactoring operators at multiple scales (method-level, class-level, and cross-class level) and in different programming languages. We develop CodeCleaner, including 11 operators for Python in multiple scales and 4 for Java. We elaborate on the rationale for why these operators could work to resolve data contamination and use both data-wise (e.g., N-gram matching overlap ratio) and model-wise metrics (e.g., perplexity) to quantify the efficacy after operators are applied. A drop of 75% overlap ratio is found when applying all operators in CodeCleaner, demonstrating their effectiveness in addressing data contamination. Besides, we migrate four operators to Java, showing their generalizability to another language. We also observed an average of 19% decrease in LLMs’ performance after applying our operators. We make CodeCleaner online available at https://github.com/ArabelaTso/CodeCleaner-v1 to facilitate further studies on mitigating LLM data contamination.","tags":["LLM","Data Contamination","Code Refactoring","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3755881.3755901","bibtex":"@inproceedings{DBLP:conf/internetware/CaoCZLLC25,\n author = {Jialun Cao and\n Songqiang Chen and\n Wuqi Zhang and\n Hau Ching Lo and\n Yeting Li and\n Shing{-}Chi Cheung},\n editor = {Hong Mei and\n Jian Lv and\n Zhi Jin and\n Xuandong Li and\n Thomas Zimmermann and\n Ge Li and\n Lei Bu and\n Xin Xia},\n title = {CodeCleaner: Mitigating Data Contamination for {LLM} Benchmarking},\n booktitle = {Proceedings of the 16th International Conference on Internetware,\n Internetware 2025, Trondheim, Norway, June 20-22, 2025},\n pages = {71--83},\n publisher = {{ACM}},\n year = {2025},\n url = {https://doi.org/10.1145/3755881.3755901},\n doi = {10.1145/3755881.3755901},\n timestamp = {Thu, 05 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/internetware/CaoCZLLC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Cross2OH: Enabling Seamless Porting of C/C++ Software Libraries to OpenHarmony.","date":"2025","authors":["Qian Zhang","Tsz-On Li","Ying Wang","Li Li","Shing-Chi Cheung"],"venue":"40th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"OpenHarmony is a new mobile operating system that offers a popular alternative to Android and iOS. To support its adoption, significant efforts have been devoted to porting C/C++ libraries from Linux to OpenHarmony. However, this porting process presents unique challenges due to the fundamental architectural differences in system libraries, runtime environments, and build systems between the two platforms. These discrepancies manifest as Cross-platform Incompatibility (CPI) issues during cross-compilation, which are particularly difficult to resolve for two key reasons. First, conventional cross-compilation toolchains provide only brief error messages that offer inadequate diagnostic information for CPI issues. Second, resolving these issues requires a deep understanding of cross-platform discrepancies, yet comprehensive documentation or systematic guidelines about such Linux-to-OpenHarmony differences remain largely unavailable.In this experience paper, to assist developers in addressing these challenges, we conducted an empirical study on 92 C/C++ libraries successfully ported to OpenHarmony. Through manual step-by-step reproduction of all CPI issues, our study reveals that discrepancies between Linux and OpenHarmony can be divided into three categories, and CPI issues can manifest through eight dimensions. Furthermore, we identified eight common adaptation strategies for resolving CPI issues. Based on these findings, we present Cross2OH, an automated technique for porting Linux-based software to OpenHarmony. Our approach combines: (1) an adaptation knowledge base (derived from RQ1 and RQ2 findings) and (2) a static analysis approach to detect and patch eight types of CPI issues. Evaluation using real developer patches shows Cross2OH achieves 0.94 recall and 0.91 precision in resolving CPI issues. Notably, Cross2OH enables successful cross-compilation for 40 critical libraries (including dependencies for popular Android apps such as WeChat, Microsoft Excel, Bilibili), with 29 of them passed official OpenHarmony review. The evaluation results demonstrate Cross2OH’s potential to streamline the porting process and foster the growth of the OpenHarmony software ecosystem.","tags":["Compatibility Issues","Empirical study","Program Analysis","Testing"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE63991.2025.00146","bibtex":"@inproceedings{DBLP:conf/kbse/ZhangLWLC25,\n author = {Qian Zhang and\n Tsz{-}On Li and\n Ying Wang and\n Li Li and\n Shing{-}Chi Cheung},\n title = {Cross2OH: Enabling Seamless Porting of {C/C++} Software Libraries\n to OpenHarmony},\n booktitle = {40th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2025, Seoul, Korea, Republic of, November 16-20, 2025},\n pages = {1744--1755},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ASE63991.2025.00146},\n doi = {10.1109/ASE63991.2025.00146},\n timestamp = {Tue, 10 Feb 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/ZhangLWLC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Demystifying Cross-Language C/C++ Binaries: A Robust Software Component Analysis Approach.","date":"2025","authors":["Meiqiu Xu","Ying Wang","Wei Tang","Xian Zhan","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"40th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Binary Software Composition Analysis (BSCA) is a technique for identifying the versions of third-party libraries (TPLs) used in compiled binaries, thereby tracing the dependencies and vulnerabilities of software components without access to their source code. However, existing BSCA techniques struggle with cross-language invoked C/C++ binaries in polyglot projects due to two key challenges: (1) interference from heterogeneous Foreign Function Interface (FFI) bindings that obscure distinctive TPL features and generate false positives during matching processes, and (2) the inherent complexity of composite binaries (fused binaries), particularly prevalent in polyglot development where multiple TPLs are frequently compiled into single executable units, resulting in blurred boundaries between libraries and substantially compromising version identification precision.We propose DeeperBin, a BSCA technique that addresses these challenges through a high-quality, large-scale feature database with four key advantages: (1) high scalability that is capable of analyzing 74,647 C/C++ TPL versions, (2) efficient noise filtering to remove FFI bindings and common functions, (3) automated extraction of version string regexes for 31,855 TPL versions, and (4) generation of distinctive version features using the Minimum Description Length (MDL) principle. Evaluated on 418 cross-language binaries, DeeperBin achieves 81.2% precision and 84.6% recall for TPL detection, outperforming state-of-the-art (SOTA) techniques by 14.1% and 23.2%, respectively. For version identification, it achieves 70.3% precision, a 12.6% improvement over state-of-the-art techniques. Ablation studies confirm the usefulness of FFI filtering and MDL-based features, boosting precision and recall by 17.1% and 18.8%. DeeperBin also maintains competitive efficiency, processing binaries in 364.3 seconds while supporting the largest feature database.","tags":["Third-Party Libraries","Binary Software Composition Analysis","Program Analysis","Fault Detection"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE63991.2025.00148","bibtex":"@inproceedings{DBLP:conf/kbse/XuWTZCYZ25,\n author = {Meiqiu Xu and\n Ying Wang and\n Wei Tang and\n Xian Zhan and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Demystifying Cross-Language {C/C++} Binaries: {A} Robust Software\n Component Analysis Approach},\n booktitle = {40th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2025, Seoul, Korea, Republic of, November 16-20, 2025},\n pages = {1768--1780},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ASE63991.2025.00148},\n doi = {10.1109/ASE63991.2025.00148},\n timestamp = {Mon, 09 Feb 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/XuWTZCYZ25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"LspFuzz: Hunting Bugs in Language Servers.","date":"2025","authors":["Hengcheng Zhu","Songqiang Chen","Valerio Terragni","Lili Wei","Yepang Liu","Jiarong Wu","Shing-Chi Cheung"],"venue":"40th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"The Language Server Protocol (LSP) has revolutionized the integration of code intelligence in modern software development. There are approximately 300 LSP server implementations for various languages and 50 editors offering LSP integration. However, the reliability of LSP servers is a growing concern, as crashes can disable all code intelligence features and significantly impact productivity, while vulnerabilities can put developers at risk even when editing untrusted source code. Despite the widespread adoption of LSP, no existing techniques specifically target LSP server testing. To bridge this gap, we present LspFuzz, a grey-box hybrid fuzzer for systematic LSP server testing. Our key insight is that effective LSP server testing requires holistic mutation of source code and editor operations, as bugs often manifest from their combinations. To satisfy the sophisticated constraints of LSP and effectively explore the input space, we employ a two-stage mutation pipeline: syntax-aware mutations to source code, followed by context-aware dispatching of editor operations. We evaluated LspFuzz on four widely used LSP servers. LspFuzz demonstrated superior performance compared to baseline fuzzers, and uncovered previously unknown bugs in real-world LSP servers. Of the 51 bugs we reported, 42 have been confirmed, 26 have been fixed by developers, and two have been assigned CVE numbers. Our work advances the quality assurance of LSP servers, providing both a practical tool and foundational insights for future research in this domain.","tags":["Testing","Program Analysis","Security","Fault Detection"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE63991.2025.00183","bibtex":"@inproceedings{DBLP:conf/kbse/ZhuCTWLWC25,\n author = {Hengcheng Zhu and\n Songqiang Chen and\n Valerio Terragni and\n Lili Wei and\n Yepang Liu and\n Jiarong Wu and\n Shing{-}Chi Cheung},\n title = {LspFuzz: Hunting Bugs in Language Servers},\n booktitle = {40th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2025, Seoul, Korea, Republic of, November 16-20, 2025},\n pages = {2209--2221},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ASE63991.2025.00183},\n doi = {10.1109/ASE63991.2025.00183},\n timestamp = {Sun, 08 Feb 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/ZhuCTWLWC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Word Closure-Based Metamorphic Testing for Machine Translation.","date":"2024","authors":["Xiaoyuan Xie","Shuo Jin","Songqiang Chen","Shing-Chi Cheung"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","abstract":"With the wide application of machine translation, the testing of Machine Translation Systems (MTSs) has attracted much attention. Recent works apply Metamorphic Testing (MT) to address the oracle problem in MTS testing. Existing MT methods for MTS generally follow the workflow of input transformation and output relation comparison, which generates a follow-up input sentence by mutating the source input and compares the source and follow-up output translations to detect translation errors, respectively. These methods use various input transformations to generate the test case pairs and have successfully triggered numerous translation errors. However, they have limitations in performing fine-grained and rigorous output relation comparison and thus may report many false alarms and miss many true errors. In this article, we propose a word closure-based output comparison method to address the limitations of the existing MTS MT methods. We first propose word closure as a new comparison unit, where each closure includes a group of correlated input and output words in the test case pair. Word closures suggest the linkages between the appropriate fragment in the source output translation and its counterpart in the follow-up output for comparison. Next, we compare the semantics on the level of word closure to identify the translation errors. In this way, we perform a fine-grained and rigorous semantic comparison for the outputs and thus realize more effective violation identification. We evaluate our method with the test cases generated by five existing input transformations and the translation outputs from three popular MTSs. Results show that our method significantly outperforms the existing works in violation identification by improving the precision and recall and achieving an average increase of 29.9% in F1 score. It also helps to increase the F1 score of translation error localization by 35.9%.","tags":["Metamorphic Testing","Machine Translation","Testing","Semantic Comparison"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3675396","bibtex":"@article{DBLP:journals/tosem/XieJCC24,\n author = {Xiaoyuan Xie and\n Shuo Jin and\n Songqiang Chen and\n Shing{-}Chi Cheung},\n title = {Word Closure-Based Metamorphic Testing for Machine Translation},\n journal = {{ACM} Trans. Softw. Eng. Methodol.},\n volume = {33},\n number = {8},\n pages = {203:1--203:46},\n year = {2024},\n url = {https://doi.org/10.1145/3675396},\n doi = {10.1145/3675396},\n timestamp = {Sun, 02 Nov 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/tosem/XieJCC24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Shortening Overlong Method Names with Abbreviations.","date":"2024","authors":["Yanjie Jiang","Hui Liu","Shing-Chi Cheung","Lu Zhang"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","abstract":"Methods should be named to summarize their responsibilities meaningfully. When a method has a non-trivial responsibility, it may require a naming using multiple words. However, overlong method names are susceptible to typos and reduced readability (e.g., displaying a statement partially in standard screen width or splitting it into multiple lines). Programming naming conventions commonly adopt a maximal length (in characters) for identifiers. In practice, developers may not necessarily find a meaningful name that follows such naming conventions when coding a non-trivial method. This article presents the first automated technique (called NameCompressor ) to shorten overlong method names. Our inspiration is that many lengthy words/phrases in an overlong method name have known and unambiguous abbreviations. The use of these abbreviations for method names is common. To shorten an overlong method name, NameCompressor employs three compression techniques, i.e., context-aware compression, probability-based compression, and machine learning-based compression, to find appropriate abbreviations for the words/phrases in the method name. We evaluate NameCompressor on a dataset of 700 overlong method names. It correctly generates 613 short names identical to those specified by the developers of these methods.","tags":["Program Synthesis","Unit Test","Software Analytics","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3676959","bibtex":"@article{DBLP:journals/tosem/JiangLCZ24,\n author = {Yanjie Jiang and\n Hui Liu and\n Shing{-}Chi Cheung and\n Lu Zhang},\n title = {Shortening Overlong Method Names with Abbreviations},\n journal = {{ACM} Trans. Softw. Eng. Methodol.},\n volume = {33},\n number = {8},\n pages = {205:1--205:24},\n year = {2024},\n url = {https://doi.org/10.1145/3676959},\n doi = {10.1145/3676959},\n timestamp = {Sat, 25 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/tosem/JiangLCZ24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Enchanting Program Specification Synthesis by Large Language Models Using Static Analysis and Program Verification.","date":"2024","authors":["Cheng Wen","Jialun Cao","Jie Su","Zhiwu Xu","Shengchao Qin","Mengda He","Haokun Li","Shing-Chi Cheung","Cong Tian"],"venue":"Computer Aided Verification - 36th International Conference","venueShort":"CAV","abstract":"Formal verification provides a rigorous and systematic approach to ensure the correctness and reliability of software systems. Yet, constructing specifications for the full proof relies on domain expertise and non-trivial manpower. In view of such needs, an automated approach for specification synthesis is desired. While existing automated approaches are limited in their versatility, i.e., they either focus only on synthesizing loop invariants for numerical programs, or are tailored for specific types of programs or invariants. Programs involving multiple complicated data types (e.g., arrays, pointers) and code structures (e.g., nested loops, function calls) are often beyond their capabilities. To help bridge this gap, we present AutoSpec, an automated approach to synthesize specifications for automated program verification. It overcomes the shortcomings of existing work in specification versatility, synthesizing satisfiable and adequate specifications for full proof. It is driven by static analysis and program verification, and is empowered by large language models (LLMs). AutoSpec addresses the practical challenges in three ways: (1) driving AutoSpec by static analysis and program verification, LLMs serve as generators to generate candidate specifications, (2) programs are decomposed to direct the attention of LLMs, and (3) candidate specifications are validated in each round to avoid error accumulation during the interaction with LLMs. In this way, AutoSpec can incrementally and iteratively generate satisfiable and adequate specifications. The evaluation shows its effectiveness and usefulness, as it outperforms existing works by successfully verifying 79% of programs through automatic specification synthesis, a significant improvement of 1.592x. It can also be successfully applied to verify the programs in a real-world X509-parser project.","tags":["Formal Software Verification","Program Synthesis","LLM","Specification Synthesis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/978-3-031-65630-9_16","bibtex":"@inproceedings{DBLP:conf/cav/WenCSXQHLCT24,\n author = {Cheng Wen and\n Jialun Cao and\n Jie Su and\n Zhiwu Xu and\n Shengchao Qin and\n Mengda He and\n Haokun Li and\n Shing{-}Chi Cheung and\n Cong Tian},\n editor = {Arie Gurfinkel and\n Vijay Ganesh},\n title = {Enchanting Program Specification Synthesis by Large Language Models\n Using Static Analysis and Program Verification},\n booktitle = {Computer Aided Verification - 36th International Conference, {CAV}\n 2024, Montreal, QC, Canada, July 24-27, 2024, Proceedings, Part {II}},\n series = {Lecture Notes in Computer Science},\n pages = {302--328},\n publisher = {Springer},\n year = {2024},\n url = {https://doi.org/10.1007/978-3-031-65630-9\\_16},\n doi = {10.1007/978-3-031-65630-9\\_16},\n timestamp = {Mon, 23 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/cav/WenCSXQHLCT24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Towards Understanding the Bugs in Solidity Compiler.","date":"2024","authors":["Haoyang Ma","Wuqi Zhang","Qingchao Shen","Yongqiang Tian","Junjie Chen","Shing-Chi Cheung"],"venue":"the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","abstract":"Solidity compiler plays a key role in enabling the development of smart contract applications on Ethereum by governing the syntax of a domain-specific language called Solidity and performing compilation and optimization of Solidity code. The correctness of Solidity compiler is critical in fostering transparency, efficiency, and trust in industries reliant on smart contracts. However, like other software systems, Solidity compiler is prone to bugs, which may produce incorrect bytecodes on blockchain platforms, resulting in severe security concerns. As a domain-specific compiler for smart contracts, Solidity compiler differs from other compilers in many perspectives, posing unique challenges to detect its bugs. To understand the bugs in Solidity compiler and benefit future research, in this paper, we present the first systematic study on 533 Solidity compiler bugs. We carefully examined their characteristics (including symptoms, root causes, and distribution), and their triggering test cases. Our study leads to seven bug-revealing takeaways for Solidity compiler. Moreover, to study the limitations of Solidity compiler fuzzers and bring our findings into practical scenarios, we evaluate three Solidity compiler fuzzers on our constructed benchmark. The results show that these fuzzers are inefficient in detecting Solidity compiler bugs. The inefficiency arises from their failure to consider the interesting bug-inducing features, bug-related compilation flags, and test oracles.","tags":["Smart Contracts","Compiler testing","Empirical study","Fault Detection"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3650212.3680362","bibtex":"@inproceedings{DBLP:conf/issta/MaZS00C24,\n author = {Haoyang Ma and\n Wuqi Zhang and\n Qingchao Shen and\n Yongqiang Tian and\n Junjie Chen and\n Shing{-}Chi Cheung},\n editor = {Maria Christakis and\n Michael Pradel},\n title = {Towards Understanding the Bugs in Solidity Compiler},\n booktitle = {Proceedings of the 33rd {ACM} {SIGSOFT} International Symposium on\n Software Testing and Analysis, {ISSTA} 2024, Vienna, Austria, September\n 16-20, 2024},\n pages = {1312--1324},\n publisher = {{ACM}},\n year = {2024},\n url = {https://doi.org/10.1145/3650212.3680362},\n doi = {10.1145/3650212.3680362},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/issta/MaZS00C24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"JavaBench: A Benchmark of Object-Oriented Code Generation for Evaluating Large Language Models.","date":"2024","authors":["Jialun Cao","Zhiyong Chen","Jiarong Wu","Shing-Chi Cheung","Chang Xu"],"venue":"the 39th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Code generation benchmarks such as HumanEval are widely adopted to evaluate LLMs' capabilities. However, after consolidating the latest 24 benchmarks, we noticed three significant imbalances. First, imbalanced programming language. 95.8% of benchmarks involve Python, while only 5 benchmarks involve Java, resulting in an insufficient understanding of LLMs' capability to generate Java code. Second, imbalanced code granularity. Function-/statement-level benchmarks account for over 83.3% of benchmarks. Only a mere handful extends to class-/project-levels, and all are limited to Python. Third, lacking advanced features. Existing benchmarks primarily assess basic coding skills (e.g., variables, operators, and control structures), while overlooking advanced Object-Oriented Programming (OOP) features (i.e., encapsulation, inheritance, and polymorphism). Considering the prevalence of these advanced features in real-world Java project development, constructing benchmarks to test LLMs on handling OOP features is necessary.","tags":["LLM","Benchmark","Java","Object-Oriented Programming"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3691620.3695470","bibtex":"@inproceedings{DBLP:conf/kbse/CaoCWC024,\n author = {Jialun Cao and\n Zhiyong Chen and\n Jiarong Wu and\n Shing{-}Chi Cheung and\n Chang Xu},\n editor = {Vladimir Filkov and\n Baishakhi Ray and\n Minghui Zhou},\n title = {JavaBench: {A} Benchmark of Object-Oriented Code Generation for Evaluating\n Large Language Models},\n booktitle = {Proceedings of the 39th {IEEE/ACM} International Conference on Automated\n Software Engineering, {ASE} 2024, Sacramento, CA, USA, October 27\n - November 1, 2024},\n pages = {870--882},\n publisher = {{ACM}},\n year = {2024},\n url = {https://doi.org/10.1145/3691620.3695470},\n doi = {10.1145/3691620.3695470},\n timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/CaoCWC024.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Neural-FEBI: Accurate function identification in Ethereum Virtual Machine bytecode.","date":"2023","authors":["Jiahao He","Shuangyin Li","Xinming Wang","Shing-Chi Cheung","Gansen Zhao","Jinji Yang"],"venue":"Journal of Systems and Software","venueShort":"JSS","abstract":"Millions of smart contracts have been deployed onto the Ethereum platform, posing potential attack subjects. Therefore, analyzing contract binaries is vital since their sources are unavailable, involving identification comprising function entry identification and detecting its boundaries. Such boundaries are critical to many smart contract applications, e.g. reverse engineering and profiling. Unfortunately, it is challenging to identify functions from these stripped contract binaries due to the lack of internal function call statements and the compiler-inducing instruction reshuffling. Recently, several existing works excessively relied on a set of handcrafted heuristic rules which impose several faults. To address this issue, we propose a novel neural network-based framework for EVM bytecode Function Entries and Boundaries Identification (neural-FEBI) that does not rely on a fixed set of handcrafted rules. Instead, it used a two-level bi-Long Short-Term Memory network and a Conditional Random Field network to locate the function entries. The suggested framework also devises a control flow traversal algorithm to determine the code segments reachable from the function entry as its boundary. Several experiments on 38,996 publicly available smart contracts collected as binary demonstrate that neural-FEBI confirms the lowest and highest F1-scores for the function entries identification task across different datasets of 88.3 to 99.7, respectively. Its performance on the function boundary identification task is also increased from 79.4% to 97.1% compared with state-of-the-art. We further demonstrate that the identified function information can be used to construct more accurate intra-procedural CFGs and call graphs. The experimental results confirm that the proposed framework significantly outperforms state-of-the-art, often based on handcrafted heuristic rules.","tags":["Smart Contracts","Program Analysis","Neural Networks","Blockchain"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1016/j.jss.2023.111627","bibtex":"@article{DBLP:journals/jss/HeLWCZY23,\n author = {Jiahao He and\n Shuangyin Li and\n Xinming Wang and\n Shing{-}Chi Cheung and\n Gansen Zhao and\n Jinji Yang},\n title = {Neural-FEBI: Accurate function identification in Ethereum Virtual\n Machine bytecode},\n journal = {J. Syst. Softw.},\n volume = {199},\n pages = {111627},\n year = {2023},\n url = {https://doi.org/10.1016/j.jss.2023.111627},\n doi = {10.1016/J.JSS.2023.111627},\n timestamp = {Sat, 13 May 2023 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/jss/HeLWCZY23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"COMET: Coverage-guided Model Generation For Deep Learning Library Testing.","date":"2023","authors":["Meiziniu Li","Jialun Cao","Yongqiang Tian","Tsz On Li","Ming Wen","Shing-Chi Cheung"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","abstract":"Recent deep learning (DL) applications are mostly built on top of DL libraries. The quality assurance of these libraries is critical to the dependable deployment of DL applications. Techniques have been proposed to generate various DL models and apply them to test these libraries. However, their test effectiveness is constrained by the diversity of layer API calls in their generated DL models. Our study reveals that these techniques can cover at most 34.1% layer inputs, 25.9% layer parameter values, and 15.6% layer sequences. As a result, we find that many bugs arising from specific layer API calls (i.e., specific layer inputs, parameter values, or layer sequences) can be missed by existing techniques. Because of this limitation, we propose COMET to effectively generate DL models with diverse layer API calls for DL library testing. COMET: (1) designs a set of mutation operators and a coverage-based search algorithm to diversify layer inputs, layer parameter values, and layer sequences in DL models. (2) proposes a model synthesis method to boost the test efficiency without compromising the layer API call diversity. Our evaluation result shows that COMET outperforms baselines by covering twice as many layer inputs (69.7% vs. 34.1%), layer parameter values (50.2% vs. 25.9%), and layer sequences (39.0% vs. 15.6%) as those by the state-of-the-art. Moreover, COMET covers 3.4% more library branches than those by existing techniques. Finally, COMET detects 32 new bugs in the latest version of eight popular DL libraries, including TensorFlow and MXNet, with 21 of them confirmed by DL library developers and seven of those confirmed bugs have been fixed by developers.","tags":["Deep Learning compiler testing","Testing","DL Compiler","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3583566","bibtex":"@article{DBLP:journals/tosem/LiCTLWC23,\n author = {Meiziniu Li and\n Jialun Cao and\n Yongqiang Tian and\n Tsz On Li and\n Ming Wen and\n Shing{-}Chi Cheung},\n title = {{COMET:} Coverage-guided Model Generation For Deep Learning Library\n Testing},\n journal = {{ACM} Trans. Softw. Eng. Methodol.},\n volume = {32},\n number = {5},\n pages = {127:1--127:34},\n year = {2023},\n url = {https://doi.org/10.1145/3583566},\n doi = {10.1145/3583566},\n timestamp = {Thu, 31 Aug 2023 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/tosem/LiCTLWC23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Runtime Permission Issues in Android Apps: Taxonomy, Practices, and Ways Forward.","date":"2021-01-01","authors":["Ying Wang","Yibo Wang","Sinan Wang","Yepang Liu","Chang Xu","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"IEEE Transactions on Software Engineering","venueShort":"TSE","abstract":"Android introduces a new permission model that allows apps to request permissions at runtime rather than at the installation time since 6.0 (Marshmallow, API level 23). While this runtime permission model provides users with greater flexibility in controlling an app's access to sensitive data and system features, it brings new challenges to app development. First, as users may grant or revoke permissions at any time while they are using an app, developers need to ensure that the app properly checks and requests required permissions before invoking any permission-protected APIs. Second, Android's permission mechanism keeps evolving and getting customized by device manufacturers. Developers are expected to comprehensively test their apps on different Android versions and device models to make sure permissions are properly requested in all situations. Unfortunately, these requirements are often impractical for developers. In practice, many Android apps suffer from various runtime permission issues (ARP issues). While existing studies have explored ARP issues, the understanding of such issues is still preliminary. To better characterize ARP issues, we performed an empirical study using 135 Stack Overflow posts that discuss ARP issues and 199 real ARP issues archived in popular open-source Android projects on GitHub. Via analyzing the data, we observed 11 types of ARP issues that commonly occur in Android apps. For each type of issues, we systematically studied: (1) how they can be manifested, (2) how pervasive and serious they are in real-world apps, and (3) how they can be fixed. We also analyzed the evolution trend of different types of issues from 2015 to 2020 to understand their impact on the Android ecosystem. Furthermore, we conducted a field survey and in-depth interviews among the practitioners from open-source community and industry, to gain insights from practitioners’ practices and learn their requirements of tools that can help combat ARP issues. Finally, to understand the strengths and weaknesses of the existing tools that can detect ARP issues, we built ARPBench , an open benchmark consisting of 94 real ARP issues, and evaluated the performance of three available tools. The experimental results indicate that the existing tools have very limited supports for detecting our observed issue types and report a large number of false alarms. We further analyzed the tools’ limitations and summarized the challenges of designing an effective ARP issue detection technique. We hope that our findings can shed light on future research and provide useful guidance to practitioners.","tags":["Android","Empirical study","Security","Testing"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/TSE.2022.3148258","bibtex":"@article{DBLP:journals/tse/WangWWLXCYZ23,\n author = {Ying Wang and\n Yibo Wang and\n Sinan Wang and\n Yepang Liu and\n Chang Xu and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Runtime Permission Issues in Android Apps: Taxonomy, Practices, and\n Ways Forward},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {49},\n number = {1},\n pages = {185--210},\n year = {2023},\n url = {https://doi.org/10.1109/TSE.2022.3148258},\n doi = {10.1109/TSE.2022.3148258},\n timestamp = {Thu, 21 Nov 2024 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/tse/WangWWLXCYZ23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Plumber: Boosting the Propagation of Vulnerability Fixes in the npm Ecosystem.","date":"2023","authors":["Ying Wang","Peng Sun","Lin Pei","Yue Yu","Chang Xu","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"IEEE Transactions on Software Engineering","venueShort":"TSE","abstract":"Vulnerabilities are known reported security threats that affect a large amount of packages in the npm ecosystem. To mitigate these security threats, the open-source community strongly suggests vulnerable packages to timely publish vulnerability fixes and recommends affected packages to update their dependencies. However, there are still serious lags in the propagation of vulnerability fixes in the ecosystem. In our preliminary study on the latest versions of 356,283 active npm packages, we found that 20.0% of them can still introduce vulnerabilities via direct or transitive dependencies although the involved vulnerable packages have already published fix versions for over a year. Prior study by (Chinthanet et al. 2021) lays the groundwork for research on how to mitigate propagation lags of vulnerability fixes in an ecosystem. They conducted an empirical investigation to identify lags that might occur between the vulnerable package release and its fixing release. They found that factors such as the branch upon which a fix landed and the severity of the vulnerability had a small effect on its propagation trajectory throughout the ecosystem. To ensure quick adoption and propagation of a release that contains the fix, they gave several actionable advice to developers and researchers. However, it is still an open question how to design an effective technique to accelerate the propagation of vulnerability fixes. Motivated by this problem, in this paper, we conducted an empirical study to learn the scale of packages that block the propagation of vulnerability fixes in the ecosystem and investigate their evolution characteristics. Furthermore, we distilled the remediation strategies that have better effects on mitigating the fix propagation lags. Leveraging our empirical findings, we propose an ecosystem-level technique, Plumber , for deriving feasible remediation strategies to boost the propagation of vulnerability fixes. To precisely diagnose the causes of fix propagation blocking, Plumber models the vulnerability metadata, and npm dependency metadata and continuously monitors their evolution. By analyzing a full-picture of the ecosystem-level dependency graph and the corresponding fix propagation statuses, it derives remediation schemes for pivotal packages. In the schemes, Plumber provides customized remediation suggestions with vulnerability impact analysis to arouse package developers’ awareness. We applied Plumber to generating 268 remediation reports for the identified pivotal packages, to evaluate its remediation effectiveness based on developers’ feedback. Encouragingly, 47.4% our remediation reports received positive feedback from many well-known npm projects, such as Tensorflow/tfjs , Ethers.js , and GoogleChrome/workbox . Our reports have boosted the propagation of vulnerability fixes into 16,403 root packages through 92,469 dependency paths. On average, each remediated package version is receiving 72,678 downloads per week by the time of this work.","tags":["Security","Empirical study","Dependency Management","Vulnerability"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/TSE.2023.3243262","bibtex":"@article{DBLP:journals/tse/0038SP00C0023,\n author = {Ying Wang and\n Peng Sun and\n Lin Pei and\n Yue Yu and\n Chang Xu and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Plumber: Boosting the Propagation of Vulnerability Fixes in the npm\n Ecosystem},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {49},\n number = {5},\n pages = {3155--3181},\n year = {2023},\n url = {https://doi.org/10.1109/TSE.2023.3243262},\n doi = {10.1109/TSE.2023.3243262},\n timestamp = {Mon, 28 Aug 2023 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/tse/0038SP00C0023.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"ConfFix: Repairing Configuration Compatibility Issues in Android Apps.","date":"2023","authors":["Huaxun Huang","Chi Xu","Ming Wen","Yepang Liu","Shing-Chi Cheung"],"venue":"the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","abstract":"XML configuration files are widely-used to specify the user interfaces (UI) of Android apps. Configuration compatibility (CC) issues are induced owing to the inconsistent handling of such XML configuration files across different Android framework versions. CC issues can cause software crashes and inconsistent look-and-feels, severely impacting the user experience of Android apps. However, there is no universal solution to resolve CC issues and app developers need to handle CC issues case by case. Existing tools are designed based on predefined rules or visual features that are possibly manifested by CC issues. Unfortunately, they can fail or generate overfitting patches when the CC issues are beyond their capabilities. To fill the above research gaps, we first empirically studied the app developers' common strategies in patching real-world CC issues. Based on the findings, we propose ConfFix, an automatic approach to repair CC issues in Android apps. ConfFix is driven by the knowledge of how an XML element is handled inconsistently in different versions of the Android framework and generates patches to eliminate such inconsistencies. We evaluated ConfFix on a set of 77 reproducible CC issues in 13 open-source Android apps. The results show that ConfFix outperforms baselines in successfully repairing 64 CC issues with a high precision. Encouragingly, the patches for 38 CC issues have been confirmed and merged by app developers.","tags":["Android","XML Configurations","Compatibility Issues","Program Repair"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3597926.3598074","bibtex":"@inproceedings{DBLP:conf/issta/HuangXW0C23,\n author = {Huaxun Huang and\n Chi Xu and\n Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung},\n editor = {Ren{\\'{e}} Just and\n Gordon Fraser},\n title = {ConfFix: Repairing Configuration Compatibility Issues in Android Apps},\n booktitle = {Proceedings of the 32nd {ACM} {SIGSOFT} International Symposium on\n Software Testing and Analysis, {ISSTA} 2023, Seattle, WA, USA, July\n 17-21, 2023},\n pages = {514--525},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3597926.3598074},\n doi = {10.1145/3597926.3598074},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/issta/HuangXW0C23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"ωTest: WebView-Oriented Testing for Android Applications.","date":"2023","authors":["Jiajun Hu","Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","abstract":"WebView is a UI widget that helps integrate web applications into the native context of Android apps. It provides powerful mechanisms for bi-directional interactions between the native-end (Java) and the web-end (JavaScript) of an Android app. However, these interaction mechanisms are complicated and have induced various types of bugs. To mitigate the problem, various techniques have been proposed to detect WebView-induced bugs via dynamic analysis, which heavily relies on executing tests to explore WebView behaviors. Unfortunately, these techniques either require manual effort or adopt random test generation approaches, which are not able to effectively explore diverse WebView behaviors. In this paper, we study the problem of test generation for WebViews in Android apps. Effective test generation for WebViews requires identifying the essential program properties to be covered by the generated tests. To this end, we propose WebView-specific properties to characterize WebView behaviors, and devise a cross-language dynamic analysis method to identify these properties. We develop ωTest, a test generation technique that searches for event sequences covering the identified WebView-specific properties. An evaluation on 74 real-world open-/closed-source Android apps shows that ωTest can cover diverse WebView behaviors and detect WebView-induced bugs effectively. ωTest detected 36 previously-unknown bugs. From the 22 bugs that we have reported to the app developers, 13 bugs were confirmed, 9 of which were fixed.","tags":["Android","Testing","WebView","Program Analysis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3597926.3598112","bibtex":"@inproceedings{DBLP:conf/issta/HuW0C23,\n author = {Jiajun Hu and\n Lili Wei and\n Yepang Liu and\n Shing{-}Chi Cheung},\n editor = {Ren{\\'{e}} Just and\n Gordon Fraser},\n title = {{\\(\\omega\\)}Test: WebView-Oriented Testing for Android Applications},\n booktitle = {Proceedings of the 32nd {ACM} {SIGSOFT} International Symposium on\n Software Testing and Analysis, {ISSTA} 2023, Seattle, WA, USA, July\n 17-21, 2023},\n pages = {992--1004},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3597926.3598112},\n doi = {10.1145/3597926.3598112},\n timestamp = {Fri, 16 May 2025 13:09:31 +0200},\n biburl = {https://dblp.org/rec/conf/issta/HuW0C23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Nuances are the Key: Unlocking ChatGPT to Find Failure-Inducing Tests with Differential Prompting.","date":"2023","authors":["Tsz On Li","Wenxi Zong","Yibo Wang","Haoye Tian","Ying Wang","Shing-Chi Cheung","Jeff Kramer"],"venue":"38th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Automated detection of software failures is an important but challenging software engineering task. It involves finding in a vast search space the failure-inducing test cases that contain an input triggering the software fault and an oracle asserting the incorrect execution. We are motivated to study how far this outstanding challenge can be solved by recent advances in large language models (LLMs) such as ChatGPT. However, our study reveals that ChatGPT has a relatively low success rate (28.8%) in finding correct failure-inducing test cases for buggy programs. A possible conjecture is that finding failure-inducing test cases requires analyzing the subtle differences (nuances) between the tokens of a program's correct version and those for its buggy version. When these two versions have similar sets of tokens and attentions, ChatGPT is weak in distinguishing their differences. We find that ChatGPT can successfully generate failure-inducing test cases when it is guided to focus on the nuances. Our solution is inspired by an interesting observation that ChatGPT could infer the intended functionality of buggy code if it is similar to the correct version. Driven by the inspiration, we develop a novel technique, called Differential Prompting, to effectively find failure-inducing test cases with the help of the compilable code synthesized by the inferred intention. Prompts are constructed based on the nuances between the given version and the synthesized code. We evaluate Differential Prompting on Quixbugs (a popular benchmark of buggy programs) and recent programs published at Codeforces (a popular programming contest portal, which is also an official benchmark of ChatGPT). We compare Differential Prompting with two baselines constructed using conventional ChatGPT prompting and Pynguin (the state-of-the-art unit test generation tool for Python programs). Our evaluation results show that for programs of Quixbugs, Differential Prompting can achieve a success rate of 75.0% in finding failure-inducing test cases, outperforming the best baseline by 2.6X. For programs of Codeforces, Differential Prompting's success rate is 66.7%, outperforming the best baseline by 4.0X.","tags":["LLM","Testing","Bug Detection","Program Synthesis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE56229.2023.00089","bibtex":"@inproceedings{DBLP:conf/kbse/LiZWTWCK23,\n author = {Tsz On Li and\n Wenxi Zong and\n Yibo Wang and\n Haoye Tian and\n Ying Wang and\n Shing{-}Chi Cheung and\n Jeff Kramer},\n title = {Nuances are the Key: Unlocking ChatGPT to Find Failure-Inducing Tests\n with Differential Prompting},\n booktitle = {38th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2023, Luxembourg, September 11-15, 2023},\n pages = {14--26},\n publisher = {{IEEE}},\n year = {2023},\n url = {https://doi.org/10.1109/ASE56229.2023.00089},\n doi = {10.1109/ASE56229.2023.00089},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/LiZWTWCK23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Testing Coreference Resolution Systems without Labeled Test Sets.","date":"2023","authors":["Jialun Cao","Yaojie Lu","Ming Wen","Shing-Chi Cheung"],"venue":"the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","venueShort":"ESEC/FSE","abstract":"Coreference resolution (CR) is a task to resolve different expressions (e.g., named entities, pronouns) that refer to the same real-world en- tity/event. It is a core natural language processing (NLP) component that underlies and empowers major downstream NLP applications such as machine translation, chatbots, and question-answering. De- spite its broad impact, the problem of testing CR systems has rarely been studied. A major difficulty is the shortage of a labeled dataset for testing. While it is possible to feed arbitrary sentences as test inputs to a CR system, a test oracle that captures their expected test outputs (coreference relations) is hard to define automatically. To address the challenge, we propose Crest, an automated testing methodology for CR systems. Crest uses constituency and depen- dency relations to construct pairs of test inputs subject to the same coreference. These relations can be leveraged to define the meta- morphic relation for metamorphic testing. We compare Crest with five state-of-the-art test generation baselines on two popular CR systems, and apply them to generate tests from 1,000 sentences randomly sampled from CoNLL-2012, a popular dataset for corefer- ence resolution. Experimental results show that Crest outperforms baselines significantly. The issues reported by Crest are all true positives (i.e., 100% precision), compared with 63% to 75% achieved by the baselines.","tags":["Metamorphic Testing","Coreference Resolution","NLP","Test Generation"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3611643.3616258","bibtex":"@inproceedings{DBLP:conf/sigsoft/CaoL0C23,\n author = {Jialun Cao and\n Yaojie Lu and\n Ming Wen and\n Shing{-}Chi Cheung},\n editor = {Satish Chandra and\n Kelly Blincoe and\n Paolo Tonella},\n title = {Testing Coreference Resolution Systems without Labeled Test Sets},\n booktitle = {Proceedings of the 31st {ACM} Joint European Software Engineering\n Conference and Symposium on the Foundations of Software Engineering,\n {ESEC/FSE} 2023, San Francisco, CA, USA, December 3-9, 2023},\n pages = {107--119},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3611643.3616258},\n doi = {10.1145/3611643.3616258},\n timestamp = {Fri, 31 May 2024 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/sigsoft/CaoL0C23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Can Machine Learning Pipelines Be Better Configured?","date":"2023","authors":["Yibo Wang","Ying Wang","Tingwei Zhang","Yue Yu","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","venueShort":"ESEC/FSE","abstract":"A Machine Learning (ML) pipeline configures the workflow of a learning task using the APIs provided by ML libraries. However, a pipeline’s performance can vary significantly across different configurations of ML library versions. Misconfigured pipelines can result in inferior performance, such as poor execution time and memory usage, numeric errors and even crashes. A pipeline is subject to misconfiguration if it exhibits significantly inconsistent performance upon changes in the versions of its configured libraries or the combination of these libraries. We refer to such performance inconsistency as a pipeline configuration (PLC) issue.","tags":["ML pipeline","Compatibility Issues","Performance inconsistency","Configuration testing"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3611643.3616352","bibtex":"@inproceedings{DBLP:conf/sigsoft/WangWZ0C0023,\n author = {Yibo Wang and\n Ying Wang and\n Tingwei Zhang and\n Yue Yu and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n editor = {Satish Chandra and\n Kelly Blincoe and\n Paolo Tonella},\n title = {Can Machine Learning Pipelines Be Better Configured?},\n booktitle = {Proceedings of the 31st {ACM} Joint European Software Engineering\n Conference and Symposium on the Foundations of Software Engineering,\n {ESEC/FSE} 2023, San Francisco, CA, USA, December 3-9, 2023},\n pages = {463--475},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3611643.3616352},\n doi = {10.1145/3611643.3616352},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/sigsoft/WangWZ0C0023.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Understanding the Bug Characteristics and Fix Strategies of Federated Learning Systems.","date":"2023","authors":["Xiaohu Du","Xiao Chen","Jialun Cao","Ming Wen","Shing-Chi Cheung","Hai Jin"],"venue":"the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","venueShort":"ESEC/FSE","abstract":"Federated learning (FL) is an emerging machine learning paradigm that aims to address the problem of isolated data islands. To preserve privacy, FL allows machine learning models and deep neural networks to be trained from decentralized data kept privately at individual devices. FL has been increasingly adopted in missioncritical fields such as finance and healthcare. However, bugs in FL systems are inevitable and may result in catastrophic consequences such as financial loss, inappropriate medical decision, and violation of data privacy ordinance. While many recent studies were conducted to understand the bugs in machine learning systems, there is no existing study to characterize the bugs arising from the unique nature of FL systems. To fill the gap, we collected 395 real bugs from six popular FL frameworks (Tensorflow Federated, PySyft, FATE, Flower, PaddleFL, and Fedlearner) in GitHub and StackOverflow, and then manually analyzed their symptoms and impacts, prone stages, root causes, and fix strategies. Furthermore, we report a series of findings and actionable implications that can potentially facilitate the detection of FL bugs.","tags":["Federated Learning","Bug Detection","Empirical study","Deep Learning"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3611643.3616347","bibtex":"@inproceedings{DBLP:conf/sigsoft/DuCC0C023,\n author = {Xiaohu Du and\n Xiao Chen and\n Jialun Cao and\n Ming Wen and\n Shing{-}Chi Cheung and\n Hai Jin},\n editor = {Satish Chandra and\n Kelly Blincoe and\n Paolo Tonella},\n title = {Understanding the Bug Characteristics and Fix Strategies of Federated\n Learning Systems},\n booktitle = {Proceedings of the 31st {ACM} Joint European Software Engineering\n Conference and Symposium on the Foundations of Software Engineering,\n {ESEC/FSE} 2023, San Francisco, CA, USA, December 3-9, 2023},\n pages = {1358--1370},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3611643.3616347},\n doi = {10.1145/3611643.3616347},\n timestamp = {Mon, 23 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/sigsoft/DuCC0C023.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Multi-Constraint Adversarial Networks for Unsupervised Image-to-Image Translation.","date":"2022","authors":["Divya Saxena","Tarun Kulshrestha","Jiannong Cao","Shing-Chi Cheung"],"venue":"IEEE Transactions on Image Processing","venueShort":"TIP","abstract":"Unsupervised image-to-image translation aims to learn the mapping from an input image in a source domain to an output image in a target domain without paired training dataset. Recently, remarkable progress has been made in translation due to the development of generative adversarial networks (GANs). However, existing methods suffer from the training instability as gradients passing from discriminator to generator become less informative when the source and target domains exhibit sufficiently large discrepancies in appearance or shape. To handle this challenging problem, in this paper, we propose a novel multi-constraint adversarial model (MCGAN) for image translation in which multiple adversarial constraints are applied at generator's multi-scale outputs by a single discriminator to pass gradients to all the scales simultaneously and assist generator training for capturing large discrepancies in appearance between two domains. We further notice that the solution to regularize generator is helpful in stabilizing adversarial training, but results may have unreasonable structure or blurriness due to less context information flow from discriminator to generator. Therefore, we adopt dense combinations of the dilated convolutions at discriminator for supporting more information flow to generator. With extensive experiments on three public datasets, cat-to-dog, horse-to-zebra, and apple-to-orange, our method significantly improves state-of-the-arts on all datasets.","tags":["Deep Learning","Neural Networks","Generative Adversarial Networks","Image-to-Image Translation"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/TIP.2022.3144886","bibtex":"@article{DBLP:journals/tip/SaxenaKCC22,\n author = {Divya Saxena and\n Tarun Kulshrestha and\n Jiannong Cao and\n Shing{-}Chi Cheung},\n title = {Multi-Constraint Adversarial Networks for Unsupervised Image-to-Image\n Translation},\n journal = {{IEEE} Trans. Image Process.},\n volume = {31},\n pages = {1601--1612},\n year = {2022},\n url = {https://doi.org/10.1109/TIP.2022.3144886},\n doi = {10.1109/TIP.2022.3144886},\n timestamp = {Wed, 23 Feb 2022 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/tip/SaxenaKCC22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"SemMT: A Semantic-Based Testing Approach for Machine Translation Systems.","date":"2020-01-01","authors":["Jialun Cao","Meiziniu Li","Yeting Li","Ming Wen","Shing-Chi Cheung","Haiming Chen"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","abstract":"Machine translation has wide applications in daily life. In mission-critical applications such as translating official documents, incorrect translation can have unpleasant or sometimes catastrophic consequences. This motivates recent research on the testing methodologies for machine translation systems. Existing methodologies mostly rely on metamorphic relations designed at the textual level (e.g., Levenshtein distance) or syntactic level (e.g., distance between grammar structures) to determine the correctness of translation results. However, these metamorphic relations do not consider whether the original and the translated sentences have the same meaning (i.e., semantic similarity). To address this problem, in this article we propose SemMT, an automatic testing approach for machine translation systems based on semantic similarity checking. SemMT applies round-trip translation and measures the semantic similarity between the original and the translated sentences. Our insight is that the semantics concerning logical relations and quantifiers in sentences can be captured by regular expressions (or deterministic finite automata) where efficient semantic equivalence/similarity checking algorithms can be applied. Leveraging the insight, we propose three semantic similarity metrics and implement them in SemMT. We compared SemMT with related state-of-the-art testing techniques, demonstrating the effectiveness of mistranslation detection. The experiment results show that SemMT outperforms existing metrics, achieving an increase of 34.2% and 15.4% on accuracy and F-score, respectively. We also study the possibility of further enhancing the performance by combining various metrics. Finally, we discuss a solution to locate the suspicious trip in round-trip translation, which provides hints for bug diagnosis.","tags":["Metamorphic Testing","Machine Translation","Semantic Similarity","Regular expression"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3490488","bibtex":"@article{DBLP:journals/tosem/CaoLLWCC22,\n author = {Jialun Cao and\n Meiziniu Li and\n Yeting Li and\n Ming Wen and\n Shing{-}Chi Cheung and\n Haiming Chen},\n title = {SemMT: {A} Semantic-Based Testing Approach for Machine Translation\n Systems},\n journal = {{ACM} Trans. Softw. Eng. Methodol.},\n volume = {31},\n number = {2},\n pages = {34e:1--34e:36},\n year = {2022},\n url = {https://doi.org/10.1145/3490488},\n doi = {10.1145/3490488},\n timestamp = {Thu, 22 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/tosem/CaoLLWCC22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"APER: Evolution-Aware Runtime Permission Misuse Detection for Android Apps.","date":"2022","authors":["Sinan Wang","Yibo Wang","Xian Zhan","Ying Wang","Yepang Liu","Xiapu Luo","Shing-Chi Cheung"],"venue":"44th IEEE/ACM 44th International Conference on Software Engineering","venueShort":"ICSE","abstract":"The Android platform introduces the runtime permission model in version 6.0. The new model greatly improves data privacy and user experience, but brings new challenges for app developers. First, it allows users to freely revoke granted permissions. Hence, developers cannot assume that the permissions granted to an app would keep being granted. Instead, they should make their apps carefully check the permission status before invoking dangerous APIs. Second, the permission specification keeps evolving, bringing new types of compatibility issues into the ecosystem. To understand the impact of the challenges, we conducted an empirical study on 13,352 popular Google Play apps. We found that 86.0% apps used dangerous APIs asynchronously after permission management and 61.2% apps used evolving dangerous APIs. If an app does not properly handle permission revocations or platform differences, unexpected runtime issues may happen and even cause app crashes. We call such Android Runtime Permission issues as ARP bugs. Unfortunately, existing runtime permission issue detection tools cannot effectively deal with the ARP bugs induced by asynchronous permission management and permission specification evolution. To fill the gap, we designed a static analyzer, Aper, that performs reaching definition and dominator analysis on Android apps to detect the two types of ARP bugs. To compare Aper with existing tools, we built a benchmark, ARPfix, from 60 real ARP bugs. Our experiment results show that Aper significantly outperforms two academic tools, ARPDroid and RevDroid, and an industrial tool, Lint, on ARPfix, with an average improvement of 46.3% on F1-score. In addition, Aper successfully found 34 ARP bugs in 214 open-source Android apps, most of which can result in abnormal app behaviors (such as app crashes) according to our manual validation. We reported these bugs to the app developers. So far, 17 bugs have been confirmed and seven have been fixed.","tags":["Android","Empirical study","Security","Program Analysis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3510003.3510074","bibtex":"@inproceedings{DBLP:conf/icse/WangWZWLLC22,\n author = {Sinan Wang and\n Yibo Wang and\n Xian Zhan and\n Ying Wang and\n Yepang Liu and\n Xiapu Luo and\n Shing{-}Chi Cheung},\n title = {{APER:} Evolution-Aware Runtime Permission Misuse Detection for Android\n Apps},\n booktitle = {44th {IEEE/ACM} 44th International Conference on Software Engineering,\n {ICSE} 2022, Pittsburgh, PA, USA, May 25-27, 2022},\n pages = {125--137},\n publisher = {{ACM}},\n year = {2022},\n url = {https://doi.org/10.1145/3510003.3510074},\n doi = {10.1145/3510003.3510074},\n timestamp = {Sun, 19 Jan 2025 13:14:40 +0100},\n biburl = {https://dblp.org/rec/conf/icse/WangWZWLLC22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"DeepFD: Automated Fault Diagnosis and Localization for Deep Learning Programs.","date":"2022","authors":["Jialun Cao","Meiziniu Li","Xiao Chen","Ming Wen","Yongqiang Tian","Bo Wu","Shing-Chi Cheung"],"venue":"44th IEEE/ACM 44th International Conference on Software Engineering","venueShort":"ICSE","abstract":"As Deep Learning (DL) systems are widely deployed for mission-critical applications, debugging such systems becomes essential. Most existing works identify and repair suspicious neurons on the trained Deep Neural Network (DNN), which, unfortunately, might be a detour. Specifically, several existing studies have reported that many unsatisfactory behaviors are actually originated from the faults residing in DL programs. Besides, locating faulty neurons is not actionable for developers, while locating the faulty statements in DL programs can provide developers with more useful information for debugging. Though a few recent studies were proposed to pinpoint the faulty statements in DL programs or the training settings (e.g. too large learning rate), they were mainly designed based on predefined rules, leading to many false alarms or false negatives, especially when the faults are beyond their capabilities.","tags":["Deep Learning","Fault Detection","Program Analysis","Neural Networks"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3510003.3510099","bibtex":"@inproceedings{DBLP:conf/icse/CaoLC0TWC22,\n author = {Jialun Cao and\n Meiziniu Li and\n Xiao Chen and\n Ming Wen and\n Yongqiang Tian and\n Bo Wu and\n Shing{-}Chi Cheung},\n title = {DeepFD: Automated Fault Diagnosis and Localization for Deep Learning\n Programs},\n booktitle = {44th {IEEE/ACM} 44th International Conference on Software Engineering,\n {ICSE} 2022, Pittsburgh, PA, USA, May 25-27, 2022},\n pages = {573--585},\n publisher = {{ACM}},\n year = {2022},\n url = {https://doi.org/10.1145/3510003.3510099},\n doi = {10.1145/3510003.3510099},\n timestamp = {Tue, 24 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/icse/CaoLC0TWC22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Nufix: Escape From NuGet Dependency Maze.","date":"2022","authors":["Zhenming Li","Ying Wang","Zeqi Lin","Shing-Chi Cheung","Jian-Guang Lou"],"venue":"44th IEEE/ACM 44th International Conference on Software Engineering","venueShort":"ICSE","abstract":"Developers usually suffer from dependency maze (DM) issues, i.e., package dependency constraints are violated when a project's platform or dependencies are changed. This problem is especially serious in .NET ecosystem due to its fragmented platforms (e.g., .NET Framework, .NET Core, and .NET Standard). Fixing DM issues is challenging due to the complexity of dependency constraints: multiple DM issues often occur in one project; solving one DM issue usually causes another DM issue cropping up; the exponential search space of possible dependency combinations is also a barrier.","tags":["Dependency Management","Compatibility Issues","Third-Party Libraries","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3510003.3510118","bibtex":"@inproceedings{DBLP:conf/icse/LiWLCL22,\n author = {Zhenming Li and\n Ying Wang and\n Zeqi Lin and\n Shing{-}Chi Cheung and\n Jian{-}Guang Lou},\n title = {Nufix: Escape From NuGet Dependency Maze},\n booktitle = {44th {IEEE/ACM} 44th International Conference on Software Engineering,\n {ICSE} 2022, Pittsburgh, PA, USA, May 25-27, 2022},\n pages = {1545--1557},\n publisher = {{ACM}},\n year = {2022},\n url = {https://doi.org/10.1145/3510003.3510118},\n doi = {10.1145/3510003.3510118},\n timestamp = {Tue, 02 Aug 2022 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/icse/LiWLCL22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Insight: Exploring Cross-Ecosystem Vulnerability Impacts.","date":"2022","authors":["Meiqiu Xu","Ying Wang","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"37th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Vulnerabilities, referred to as CLV issues, are induced by cross-language invocations of vulnerable libraries. Such issues greatly increase the attack surface of Python/Java projects due to their pervasive use of C libraries. Existing Python/Java build tools in PyPI and Maven ecosystems fail to report the dependency on vulnerable libraries written in other languages such as C. CLV issues are easily missed by developers. In this paper, we conduct the first empirical study on the status quo of CLV issues in PyPI and Maven ecosystems. It is found that 82,951 projects in these ecosystems are directly or indirectly dependent on libraries compiled from the C project versions that are identified to be vulnerable in CVE reports. Our study arouses the awareness of CLV issues in popular ecosystems and presents related analysis results.","tags":["Vulnerability","Third-Party Libraries","Empirical study","Dependency Management"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3551349.3556921","bibtex":"@inproceedings{DBLP:conf/kbse/XuWCY022,\n author = {Meiqiu Xu and\n Ying Wang and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Insight: Exploring Cross-Ecosystem Vulnerability Impacts},\n booktitle = {37th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2022, Rochester, MI, USA, October 10-14, 2022},\n pages = {58:1--58:13},\n publisher = {{ACM}},\n year = {2022},\n url = {https://doi.org/10.1145/3551349.3556921},\n doi = {10.1145/3551349.3556921},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/XuWCY022.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"RegexScalpel: Regular Expression Denial of Service (ReDoS) Defense by Localize-and-Fix.","date":"2022","authors":["Yeting Li","Yecheng Sun","Zhiwu Xu","Jialun Cao","Yuekang Li","Rongchen Li","Haiming Chen","Shing-Chi Cheung","Yang Liu","Yang Xiao"],"venue":"31st USENIX Security Symposium","venueShort":"USENIX Security","abstract":null,"tags":[],"arxivUrl":null,"paperUrl":"https://www.usenix.org/conference/usenixsecurity22/presentation/li-yeting","bibtex":"@inproceedings{DBLP:conf/uss/LiS0CLLCC0X22,\n author = {Yeting Li and\n Yecheng Sun and\n Zhiwu Xu and\n Jialun Cao and\n Yuekang Li and\n Rongchen Li and\n Haiming Chen and\n Shing{-}Chi Cheung and\n Yang Liu and\n Yang Xiao},\n editor = {Kevin R. B. Butler and\n Kurt Thomas},\n title = {RegexScalpel: Regular Expression Denial of Service (ReDoS) Defense\n by Localize-and-Fix},\n booktitle = {31st {USENIX} Security Symposium, {USENIX} Security 2022, Boston,\n MA, USA, August 10-12, 2022},\n pages = {4183--4200},\n publisher = {{USENIX} Association},\n year = {2022},\n url = {https://www.usenix.org/conference/usenixsecurity22/presentation/li-yeting},\n timestamp = {Thu, 22 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/uss/LiS0CLLCC0X22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Characterizing Transaction-Reverting Statements in Ethereum Smart Contracts.","date":"2021","authors":["Lu Liu","Lili Wei","Wuqi Zhang","Ming Wen","Yepang Liu","Shing-Chi Cheung"],"venue":"36th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Smart contracts are programs stored on blockchains to execute transactions. When input constraints or security properties are violated at runtime, the transaction being executed by a smart contract needs to be reverted to avoid undesirable consequences. On Ethereum, the most popular blockchain that supports smart contracts, developers can choose among three transaction-reverting statements (i.e., require, if…revert, and if…throw) to handle anomalous transactions. While these transaction-reverting statements are vital for preventing smart contracts from exhibiting abnormal behaviors or suffering malicious attacks, there is limited understanding of how they are used in practice. In this work, we perform the first empirical study to characterize transaction-reverting statements in Ethereum smart contracts. We measured the prevalence of these statements in 3,866 verified smart contracts from popular dapps and built a taxonomy of their purposes via manually analyzing 557 transaction-reverting statements. We also compared template contracts and their corresponding custom contracts to understand how developers customize the use of transaction-reverting statements. Finally, we analyzed the security impact of transaction-reverting statements by removing them from smart contracts and comparing the mutated contracts against the original ones. Our study led to important findings. For example, we found that transaction-reverting statements are commonly used to perform seven types of authority verifications or validity checks, and missing such statements may compromise the security of smart contracts. We also found that current smart contract security analyzers cannot effectively handle transaction-reverting statements when detecting security vulnerabilities. Our findings can shed light on further research in the broad area of smart contract quality assurance and provide practical guidance to smart contract developers on the appropriate use of transaction-reverting statements.","tags":["Smart Contracts","Empirical study","Security","Vulnerability"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE51524.2021.9678597","bibtex":"@inproceedings{DBLP:conf/kbse/LiuWZ00C21,\n author = {Lu Liu and\n Lili Wei and\n Wuqi Zhang and\n Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung},\n title = {Characterizing Transaction-Reverting Statements in Ethereum Smart\n Contracts},\n booktitle = {36th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2021, Melbourne, Australia, November 15-19, 2021},\n pages = {630--641},\n publisher = {{IEEE}},\n year = {2021},\n url = {https://doi.org/10.1109/ASE51524.2021.9678597},\n doi = {10.1109/ASE51524.2021.9678597},\n timestamp = {Fri, 16 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/kbse/LiuWZ00C21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Workflow Refactoring for Maximizing Concurrency and Block-Structuredness","date":"2021","authors":["Wei Song","Hans-Arno Jacobsen","Shing-Chi Cheung","Hongyu Liu","Xiaoxing Ma"],"venue":"IEEE Trans. Serv. Comput.","venueShort":"TSC","tags":["Workflow refactoring","activity dependence","concurrency maximization","block-structuredness","synchronization links"],"abstract":"\nIn the era of Internet and big data, contemporary workflows become increasingly large in scale and complex in structure, introducing greater challenges for workflow modeling. Workflows are not with maximized concurrency and block-structuredness in terms of control flow, though languages supporting block-structuredness (e.g., BPEL) are employed. Existing workflow refactoring approaches mostly focus on maximizing concurrency according to dependences between activities, but do not consider the block-structuredness of the refactored workflow. It is easier to comprehend and analyze a workflow that is block-structured and to transform it into BPEL-like processes. In this paper, we aim at maximizing both concurrency and block-structuredness. Nevertheless, not all workflows can be refactored with a block-structured representation, and it is intractable to make sure that the refactored workflows are as block-structured as possible. We first define a well-formed dependence pattern of activities. The control flow among the activities in this pattern can be represented in block-structured forms with maximized concurrency. Then, we propose a greedy heuristics-based graph reduction approach to recursively find such patterns. In this way, the resulting workflow is with maximized concurrency and its block-structuredness approximates optimality. We show the effectiveness and efficiency of our approach with real-world scientific workflows.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@article{DBLP:journals/tsc/SongJCLM21,\n author = {Wei Song and\n Hans{-}Arno Jacobsen and\n Shing{-}Chi Cheung and\n Hongyu Liu and\n Xiaoxing Ma},\n title = {Workflow Refactoring for Maximizing Concurrency and Block-Structuredness},\n journal = {{IEEE} Trans. Serv. Comput.},\n volume = {14},\n number = {4},\n pages = {1224--1237},\n year = {2021},\n url = {https://doi.org/10.1109/TSC.2018.2867593},\n doi = {10.1109/TSC.2018.2867593},\n timestamp = {Thu, 12 Aug 2021 17:51:00 +0200},\n biburl = {https://dblp.org/rec/journals/tsc/SongJCLM21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"Sifter: A Service Isolation Strategy for Internet Applications","date":"2021","authors":["Chunyang Ye","Shing-Chi Cheung","Wing Kwong Chan"],"venue":"IEEE Trans. Serv. Comput.","venueShort":"TSC","tags":["Atomicity sphere","behavior constraint","exception handling","implicit interaction","isolation","web service"],"abstract":"\nService oriented architecture (SOA) provides a flexible platform to build collaborative Internet applications by composing existing self-contained and autonomous services. However, the implicit interactions among the concurrently provisioned services may introduce interference to Internet applications and cause them behave abnormally. It is thus desirable to isolate services to safeguard their application consistency. Existing approaches mostly address this problem by restricting concurrent execution of services to avoid all the implicit interactions. These approaches, however, compromise the performance and flexibility of Internet applications due to the long running nature of services. This paper presents Sifter, a new service isolation strategy for Internet applications. We devise in this strategy a novel static approach to analyze the potential implicit interactions among the services and their impacts on the consistency of the associated Internet applications. By locating only those afflicted implicit interactions that may violate the application consistency, a novel approach based on exception handling and behavior constraints is customized to involved services to eliminate their impacts. We show that this approach exempts the consistency property of Internet applications from being interfered at runtime. The experimental results show that our approach has a better performance than existing solutions.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@article{DBLP:journals/tsc/YeCC21,\n author = {Chunyang Ye and\n Shing{-}Chi Cheung and\n Wing Kwong Chan},\n title = {Sifter: {A} Service Isolation Strategy for Internet Applications},\n journal = {{IEEE} Trans. Serv. Comput.},\n volume = {14},\n number = {5},\n pages = {1545--1557},\n year = {2021},\n url = {https://doi.org/10.1109/TSC.2018.2876254},\n doi = {10.1109/TSC.2018.2876254},\n timestamp = {Wed, 03 Nov 2021 08:27:31 +0100},\n biburl = {https://dblp.org/rec/journals/tsc/YeCC21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"ContractGuard: Defend Ethereum Smart Contracts with Embedded Intrusion Detection","date":"2020","authors":["Xinming Wang","Jiahao He","Zhijian Xie","Gansen Zhao","Shing-Chi Cheung"],"venue":"IEEE Trans. Serv. Comput.","venueShort":"TSC","tags":["Blockchain","Smart Contracts","Program Analysis","Security"],"abstract":"\nEthereum smart contracts are programs that can be collectively executed by a network of mutually untrusted nodes. Smart contracts handle and transfer assets of values, offering strong incentives for malicious attacks. Intrusion attacks are a popular type of malicious attacks. In this article, we propose ContractGuard, the first intrusion detection system (IDS) to defend Ethereum smart contracts against such attacks. Like IDSs for conventional programs, ContractGuard detects intrusion attempts as abnormal control flow. However, existing IDS techniques/tools are inapplicable to Ethereum smart contracts due to Ethereum's decentralized nature and its highly restrictive execution environment. To address these issues, we design ContractGuard by embedding it in the contracts to profile context-tagged acyclic paths, and optimizing it under the Ethereum gas-oriented performance model. The main goal is to minimize the overheads, to which the users will be extremely sensitive since the cost needs to be paid upfront in digital concurrency. Empirical investigation using real-life contracts deployed in the Ethereum mainnet shows that on average, ContractGuard only adds to 36.14 percent of the deployment overhead and 28.27 percent of the runtime overhead. Furthermore, we conducted controlled experiments and show that ContractGuard successfully guard against attacks on all real-world vulnerabilities and 83 percent of the seeded vulnerabilities.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@article{DBLP:journals/tsc/WangHXZC20,\n author = {Xinming Wang and\n Jiahao He and\n Zhijian Xie and\n Gansen Zhao and\n Shing{-}Chi Cheung},\n title = {ContractGuard: Defend Ethereum Smart Contracts with Embedded Intrusion\n Detection},\n journal = {{IEEE} Trans. Serv. Comput.},\n volume = {13},\n number = {2},\n pages = {314--328},\n year = {2020},\n url = {https://doi.org/10.1109/TSC.2019.2949561},\n doi = {10.1109/TSC.2019.2949561},\n timestamp = {Fri, 22 May 2020 21:56:08 +0200},\n biburl = {https://dblp.org/rec/journals/tsc/WangHXZC20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"Detecting numerical bugs in neural network architectures","date":"2020","authors":["Yuhao Zhang","Luyao Ren","Liqian Chen","Yingfei Xiong","Shing-Chi Cheung","Tao Xie"],"venue":"Proceedings of the 28th ACM Joint European SoftwareEngineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE ’20)","venueShort":"ESEC/FSE","tags":["Neural Networks","Formal Software Verification","Program Analysis"],"awards":["Distinguished Paper"],"abstract":"\nDetecting bugs in deep learning software at the architecture level provides additional benefits that detecting bugs at the model level does not provide. This paper makes the first attempt to conduct static analysis for detecting numerical bugs at the architecture level. We propose a static analysis approach for detecting numerical bugs in neural architectures based on abstract interpretation. Our approach mainly comprises two kinds of abstraction techniques, i.e., one for tensors and one for numerical values. Moreover, to scale up while maintaining adequate detection precision, we propose two abstraction techniques: tensor partitioning and (elementwise) affine relation analysis to abstract tensors and numerical values, respectively. We realize the combination scheme of tensor partitioning and affine relation analysis (together with interval analysis) as DEBAR, and evaluate it on two datasets: neural architectures with known bugs (collected from existing studies) and real-world neural architectures. The evaluation results show that DEBAR outperforms other tensor and numerical abstraction techniques on accuracy without losing scalability. DEBAR successfully detects all known numerical bugs with no false positives within 1.7–2.3 seconds per architecture. On the real-world architectures, DEBAR reports 529 warnings within 2.6–135.4 seconds per architecture, where 299 warnings are true positives.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@inproceedings{DBLP:conf/sigsoft/ZhangRC0C020,\n author = {Yuhao Zhang and\n Luyao Ren and\n Liqian Chen and\n Yingfei Xiong and\n Shing{-}Chi Cheung and\n Tao Xie},\n editor = {Prem Devanbu and\n Myra B. Cohen and\n Thomas Zimmermann},\n title = {Detecting numerical bugs in neural network architectures},\n booktitle = {{ESEC/FSE} '20: 28th {ACM} Joint European Software Engineering Conference\n and Symposium on the Foundations of Software Engineering, Virtual\n Event, USA, November 8-13, 2020},\n pages = {826--837},\n publisher = {{ACM}},\n year = {2020},\n url = {https://doi.org/10.1145/3368089.3409720},\n doi = {10.1145/3368089.3409720},\n timestamp = {Tue, 10 Nov 2020 10:58:23 +0100},\n biburl = {https://dblp.org/rec/conf/sigsoft/ZhangRC0C020.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null},{"title":"Boosting automated program repair with bug-inducing commits","date":"2020","authors":["Ming Wen","Yepang Liu","Shing-Chi Cheung"],"venue":"42nd International Conference on Software Engineering (NIER Track)","venueShort":"ICSE NIER","tags":["Program Repair"],"abstract":"\nThe search space explosion problem is a long-standing challenge for search-based automated program repair (APR). The operation space, which defines how to select appropriate mutation operators, and the ingredient space, which defines how to select appropriate code elements as fixing ingredients, are two major factors that determine the search space. Conventional approaches mainly devise fixing strategies via learning from frequent fixing patterns based on substantial patches collected from open-source projects. In this paper, we propose a new direction for search-based APR, that is to repair a bug via learning from how the bug was introduced instead of learning from how other bugs are frequently fixed. Our empirical study reveals that substantial mutation operators and fixing ingredients required to fix a bug can be inferred from the commit that introduced the bug. Based on the findings of our empirical study, we devised a preliminary fixing strategy based on bug-inducing commits, which is able to repair 8 new bugs that cannot be repaired by the state-of-the-art techniques. Such results demonstrate that our proposed new idea for searched-based APR is promising.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@inproceedings{DBLP:conf/icse/Wen0C20,\n author = {Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung},\n editor = {Gregg Rothermel and\n Doo{-}Hwan Bae},\n title = {Boosting automated program repair with bug-inducing commits},\n booktitle = {{ICSE-NIER} 2020: 42nd International Conference on Software Engineering,\n New Ideas and Emerging Results, Seoul, South Korea, 27 June - 19 July,\n 2020},\n pages = {77--80},\n publisher = {{ACM}},\n year = {2020},\n url = {https://doi.org/10.1145/3377816.3381743},\n doi = {10.1145/3377816.3381743},\n timestamp = {Mon, 03 May 2021 16:42:27 +0200},\n biburl = {https://dblp.org/rec/conf/icse/Wen0C20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"MR-Scout: Automated Synthesis of Metamorphic Relations from Existing Test Cases","date":"2024-06-29","authors":["Congying Xu","Valerio Terragni","Hengcheng Zhu","Jiarong Wu","Shing-Chi Cheung"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["Meramorphic Testing"],"projectUrl":"https://mr-scout.github.io/","paperUrl":"https://dl.acm.org/doi/abs/10.1145/3656340","arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"MR-Adopt: Automatic Deduction of Input Transformation Function for Metamorphic Testing","date":"2024-09-01","authors":["Congying Xu","Songqiang Chen","Jiarong Wu","Shing-Chi Cheung","Valerio Terragni","Hengcheng Zhu","Jialun Cao"],"venue":"IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","tags":["LLM","Meramorphic Testing"],"projectUrl":"https://mr-adopt.github.io/","paperUrl":"https://arxiv.org/abs/2408.15815","arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Automatic build repair for test cases using incompatible Java versions","date":"2024","authors":["Ching Hang Mak","Shing-Chi Cheung"],"venue":"Information and Software Technology","venueShort":"INFSOF","tags":["Java","Program Analysis","Program Repair","Third-Party Libraries"],"abstract":"\n Context:\n Bug bisection is a common technique used to identify a revision that introduces a bug or indirectly fixes a bug, and often involves executing multiple revisions of a project to determine whether the bug is present within the revision. However, many legacy revisions often cannot be successfully compiled due to changes in the programming language or tools used in the compilation process, adding complexity and preventing automation in the bisection process.\n \n Objective:\n In this paper, we introduce an approach to repair test cases of Java projects by performing dependency minimization. Our approach aims to remove classes and methods that are not required for the execution of one or more test cases. Unlike existing state-of-the-art techniques, our approach performs minimization at source-level, which allows compile-time errors to be fixed.\n \n Method:\n A standalone Java tool implementing our technique was developed, and we evaluated our technique using subjects from Defects4J retargeted against Java 8 and 17.\n \n Results:\n Our evaluation showed that a majority of subjects can be repaired solely by performing minimization, including replicating the test results of the original version. Furthermore, our technique is also shown to achieve accurate minimized results, while only adding a small overhead to the bisection process.\n \n Conclusion:\n Our proposed technique is shown to be effective for repairing build failures with minimal overhead, making it suitable for use in automated bug bisection. Our tool can also be adapted for use cases such as bug corpus creation and refactoring. \n ","projectUrl":"https://github.com/Derppening/test-dependency-minimization/","arxivUrl":"https://arxiv.org/abs/2404.17818","bibtex":"@article{mak2024automatic,\n title={Automatic build repair for test cases using incompatible java versions},\n author={Mak, Ching Hang and Cheung, Shing-Chi},\n journal={Information and Software Technology},\n pages={107473},\n year={2024},\n publisher={Elsevier}\n }","paperUrl":null,"slidesUrl":null,"awards":[]},{"title":"CINA: Suppressing the Detection of Unstable Context Inconsistency","date":"2015","authors":["Chang Xu","Wang Xi","Shing-Chi Cheung","Xiaoxing Ma","Chun Cao","Jian Lu"],"venue":"IEEE Transactions of Software Engineering 41(9), September 2015","venueShort":"TSE","tags":[],"abstract":"\n Context-aware applications adapt their behavior based on contexts. Contexts can, however, be incorrect. A popular means to build dependable applications is to augment them with a set of constraints to govern the consistency of context values. These constraints are evaluated upon context changes to detect inconsistencies so that they can be timely handled. However, we observe that many context inconsistencies are unstable. They vanish by themselves and do not require handling. Such inconsistencies are detected due to misaligned sensor sampling or improper inconsistency detection scheduling. We call them unstable context inconsistencies (or STINs). STINs should be avoided to prevent unnecessary inconsistency handling and unstable behavioral adaptation to applications. In this article, we study STINs systematically, from examples to theoretical analysis, and present algorithms to suppress their detection. Our key insight is that only certain patterns of context changes can make a consistency constraint subject to the detection of STINs. We derive such patterns and proactively use them to suppress the detection of STINs. We implemented our idea and applied it to real-world applications. Experimental results confirmed its effectiveness in suppressing the detection of numerous STINs with negligible overhead, while preserving the detection of stable context inconsistencies that require inconsistency handling.\n ","paperUrl":"https://www.computer.org/csdl/trans/ts/2015/09/07078871-abs.html","bibtex":"@article{DBLP:journals/tse/XuXCMCL15,\n author = {Chang Xu and\n Wang Xi and\n Shing{-}Chi Cheung and\n Xiaoxing Ma and\n Chun Cao and\n Jian Lu},\n title = {Cina: Suppressing the Detection of Unstable Context Inconsistency},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {41},\n number = {9},\n pages = {842--865},\n year = {2015},\n url = {http://dx.doi.org/10.1109/TSE.2015.2418760},\n doi = {10.1109/TSE.2015.2418760},\n timestamp = {Thu, 10 Dec 2015 11:33:07 +0100},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/tse/XuXCMCL15},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Sifter: A Service Isolation Strategy for Internet Applications","date":"2019","authors":["Chunyang Ye","Shing-Chi Cheung","W.K. Chan"],"venue":"IEEE Transactions on Services Computing 2019","venueShort":"TSC","tags":[],"abstract":"\n Service oriented architecture (SOA) provides a flexible platform to build collaborative Internet applications by composing existing self-contained and autonomous services. However, the implicit interactions among the concurrently provisioned services may introduce interference to Internet applications and cause them behave abnormally. It is thus desirable to isolate services to safeguard their application consistency. Existing approaches mostly address this problem by restricting concurrent execution of services to avoid all the implicit interactions. These approaches, however, compromise the performance and flexibility of Internet applications due to the long running nature of services. This paper presents Sifter, a new service isolation strategy for Internet applications. We devise in this strategy a novel static approach to analyze the potential implicit interactions among the services and their impacts on the consistency of the associated Internet applications. By locating only those afflicted implicit interactions that may violate the application consistency, a novel approach based on exception handling and behavior constraints is customized to involved services to eliminate their impacts. We show that this approach exempts the consistency property of Internet applications from being interfered at runtime. The experimental results show that our approach has a better performance than existing solutions.\n ","paperUrl":"materials/TSC-cyye.pdf","bibtex":"@ARTICLE{8493286,\nauthor={C. Ye and Shing-Chi Cheung and W. K. Chan},\njournal={IEEE Transactions on Services Computing},\ntitle={Sifter: A Service Isolation Strategy for Internet Applications},\nyear={2019},\nvolume={},\nnumber={},\npages={1-1},\nmonth={},}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Testing Multithreaded Programs via Thread Speed Control","date":"2018","authors":["Dongjie Chen","Yanyan Jiang","Chang Xu","Xiaoxing Ma","Jian Lu"],"venue":"26th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE 2018), Lake Buena Vista, Florida, USA, Nov 2018","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Intensive dependencies of a Java project on third-party libraries can easily lead to the presence of multiple library or class versions on its classpath. When this happens, JVM will load one version and shadows the others. Dependency conflict (DC) issues occur when the loaded version fails to cover a required feature (e.g., method) referenced by the project, thus causing runtime exceptions. However, the warnings of duplicate classes or libraries detected by existing build tools such as Maven can be benign since not all instances of duplication will induce runtime exceptions, and hence are often ignored by developers. In this paper, we conducted an empirical study on real-world DC issues collected from large open source projects. We studied the manifestation and fixing patterns of DC issues. Based on our findings, we designed Decca, an automated detection tool that assesses DC issues' severity and filters out the benign ones. Our evaluation results on 30 projects show that Decca achieves a precision of 0.923 and recall of 0.766 in detecting high-severity DC issues. Decca also detected new DC issues in these projects. Subsequently, 20 DC bug reports were filed, and 11 of them were confirmed by developers. Issues in 6 reports were fixed with our suggested patches.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/ESECFSE18.pdf","projectUrl":"https://midwinter1993.github.io/Schnauzer/","bibtex":"@inproceedings{chen_testing_2018,\n author = {Dongjie Chen and Yanyan Jiang and Chang Xu and Xiaoxing Ma and Jian Lu},\n title = {Testing multithreaded programs via thread speed control},\n pages = {to appear},\n year = {2018},\n booktitle = {Proceedings of the 26th Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE)},\n pdf = {/spar/publication/chen_testing_2018.pdf},\n code = {https://midwinter1993.github.io/Schnauzer/},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Understanding and Detecting Callback Compatibility Issues for Android Applications","date":"2018","authors":["Huaxun Huang","Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"2018 33rd ACM/IEEE International Conference on Automated Software Engineering (ASE '18), September 2018, Montpellier, France","venueShort":"ASE","tags":[],"abstract":"\n The control flows of Android apps are largely driven by the protocols that govern how callback APIs are invoked in response to various event.\nWhen these callback APIs evolve along with the Android framework, the changes in their invocation protocols can induce unexpected control flows to existing Android apps, causing various compatibility issues. We refer to these issues as callback compatibility issues.\nWhile Android framework updates have received due attention, little is known about their impacts on app control flows and the callback compatibility issues thus induced.\nTo bridge the gap, we examined Android documentations\nand conducted an empirical study on 100 real-world callback compatibility issues\nto investigate how these issues were induced by callback API evolutions. \nBased on our empirical findings, we propose a graph-based model to capture the control flow inconsistencies caused by API evolutions and devise a static analysis technique, CIDER, to detect callback compatibility issues.\nOur evaluation of CIDER on 20 popular open-source Android apps shows that CIDER is effective. It detected 13 new callback compatibility issues in these apps, among which 12 issues were confirmed and 9 issues were fixed.\n ","paperUrl":"materials/callback.pdf","projectUrl":"https://cideranalyzer.github.io/","bibtex":"@inproceedings {ASE18,\n title = {{Understanding and Detecting Callback Compatibility Issues for Android Applications}},\n author = {Huaxun Huang, Lili Wei, Yepang Liu, Shing-Chi Cheung},\n booktitle = {Proceedings of the 2018 33rd ACM/IEEE International Conference on Automated Software Engineering, {ASE} 2018},\n year = {2018},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"A Tale of Two Cities: How WebView Induces Bugs to Android Applications","date":"2018","authors":["Jiajun Hu","Lili Wei","Yepang Liu","Shing-Chi Cheung","Huaxun Huang"],"venue":"Proceedings of 2018 33rd ACM/IEEE International Conference on Automated Software Engineering (ASE'18), September 3-7, 2018, Montpellier, France","venueShort":"ASE","tags":[],"abstract":"\n WebView is a widely used Android component that augments a native app with web browser capabilities. It eases the interactions between an app’s native code and web code. However, the interaction mechanism of WebView induces new types of bugs in Android apps. Understanding the characteristics and manifestation of these WebView-induced bugs (ωBugs for short) facilitates the correct usages of WebViews in Android apps. This motivates us to conduct the first empirical study on ωBugs based on those found in popular open-source Android apps. Our study identified the major root causes and consequences of ωBugs and made interesting observations that can be leveraged for detecting and diagnosing ωBugs. Based on the empirical study, we further propose an automated testing technique ωDroid to effectively expose ωBugs in Android apps. In our experiments, ωDroid successfully discovered 30 unique and previously-unknown ωBugs when applied to 146 open-source Android apps. We reported the 30 ωBugs to the corresponding app developers. Out of these 30 ωBugs, 14 were confirmed and 7 of them were fixed. This shows that ωDroid can effectively detect ωBugs that are of the developers’ concern.\n ","paperUrl":"materials/wDroid.pdf","projectUrl":"http://home.cse.ust.hk/~jhuao/wDroid.html","bibtex":"@inproceedings {ASE18Hu,\n title = {{A Tale of Two Cities: How WebView Induces Bugs to Android Applications}},\n author = {Jiajun Hu, Lili Wei, Yepang Liu, Shing-Chi Cheung, Huaxun Huang},\n booktitle = {{Proceedings of the 2018 33rd ACM/IEEE International Conference on Automated Software Engineering (ASE’18)}},\n year = {2018},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Synthesizing Relation-Aware Entity Transformation by Examples","date":"2018","authors":["Jiarong Wu","Yanyan Jiang","Chang Xu","Shing-Chi Cheung","Xiaoxing Ma","Jian Lu"],"venue":"40th International Conference on Software Engineering (ICSE 2018 Poster)","venueShort":"ICSE Poster","tags":[],"abstract":"\n Recently, programming by examples (PBE) technique achieves a great success in processing and transforming data entities, yet existing approaches generally fall short on the tasks concerning entity relations. This paper presents ENTER, a domain-agnostic language for relation-aware entity transformation synthesis. It leverages the combination of two basic relations, the equivalence relation and the total order relation, to succinctly express complex entity relations. ENTER can be instantiated with domain-specific elements to solve a wide range of entity transformation tasks.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/ICSE18.pdf","bibtex":"@inproceedings{wu_synthesizing_2018,\n author = {Jiarong Wu and Yanyan Jiang and Chang Xu and S. C. Cheung and Xiaoxing Ma and Jian Lu},\n title = {Synthesizing relation-aware entity transformation by examples},\n booktitle = {Proceedings of the 40th International Conference on Software Engineering (ICSE Poster Track)},\n pages = {to appear},\n year = {2018},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"AATT+: Effectively Manifesting Concurrency Bugs in Android Apps","date":"2018","authors":["Jue Wang","Yanyan Jiang","Chang Xu","Qiwei Li","Tianxiao Gu","Jun Ma","Xiaoxing Ma","Jian Lu"],"venue":"Science of Computer Programming (SCP)","venueShort":"SCP","tags":[],"abstract":"\n Smartphones are indispensable in people’s daily activities, and smartphone apps tend to be increasingly concurrent due to the wide use of multi-core devices and technologies. Due to this tendency, developers are increasingly unable to tackle the complexity of concurrent apps and to avoid subtle concurrency bugs. To better address this issue, we propose a novel approach to detecting concurrency bugs in Android apps based on the fact that one can generate simultaneous input events and their schedules for an app, which would easily trigger concurrency bugs in an app. We conduct systematic state space exploration to find potentially conflicting resource accesses in an Android app. The app is then automatically pressure-tested by guided event and schedule generation. We implemented our prototype tool named AATT+ and evaluated it with two sets of real-world Android apps. Benchmarking using 15 Android apps with previously known concurrency bugs, AATT+ and existing concurrency-unaware techniques detected 10 and 1 bugs, respectively. Evaluated with another set of 17 popu- lar Android apps, AATT+ detected 11 concurrency bugs and 7 of them were previously unknown, achieving an over 80% higher detection rate than existing concurrency-unaware techniques.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/SCP18.pdf","projectUrl":"https://github.com/skull591/AATT","bibtex":"@inproceedings{wang_aatt_2018,\n author = {Jue Wang and Yanyan Jiang and Chang Xu and Qiwei Li and Tianxiao Gu and Jun Ma and Xiaoxing Ma and Jian Lu},\n title = {AATT+: Effectively manifesting concurrency bugs in Android apps},\n journal = {Science of Computer Programming (SCP)},\n year = {2018},\n volume = {163},\n pages = {1--18},\n url = {https://doi.org/10.1016/j.scico.2018.03.008},\n code = {https://github.com/skull591/AATT},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Hybrid CPU-GPU Constraint Checking: Towards Efficient Context Consistency","date":"2016","authors":["Jun Sui","Chang Xu","Shing-Chi Cheung","Wang Xi","Yanyan Jiang","Chun Cao","Xiaoxing Ma","Jian Lu"],"venue":"Information and Software Technology (IST) 2016","venueShort":"IST","tags":[],"abstract":"\n Context: modern software increasingly relies on contexts about computing environments to provide adaptive and smart services. Such contexts, captured and derived from environments of uncontrollable noises, can be inaccurate, incomplete or even in conflict with each other. This is known as the context inconsistency problem, and should be addressed by checking contexts in time to prevent abnormal behavior to applications. One popular way is to check application contexts against consistency constraints before their uses, but this can bring heavy computation due to tremendous amount of contexts in changing environments. Existing efforts improve the checking performance by incremental or concurrent computation, but they rely on CPU computing only and can consume valuable CPU capabilities that should otherwise be used by applications themselves.\n\nObjective: in this article, we propose GAIN, a GPU-supported technique to checking consistency constraints systematically and efficiently.\n\nMethod: GAIN can automatically recognize a constraint’s parallel units and associate these units and their runtime instances with matched contexts under checking. GAIN coordinates CPU and GPU and utilizes their capabilities for task preparation and context checking, respectively.\n\nResult: we evaluate GAIN experimentally with millions of real-life context data. The evaluation results show that GAIN can work at least 2–7 × faster and requires much less CPU usage than CPU-based techniques. Besides, GAIN can also work stably for different and varying workloads.\n\nConclusion: our experience with GAIN suggests its high efficiency in constraint checking for context consistency as well as its wide applicability to different application workloads.\n ","paperUrl":"http://www.sciencedirect.com/science/article/pii/S095058491500169X","bibtex":"@article{Sui_IST2016,\n author = {Jun Sui and\n Chang Xu and\n Shing{-}Chi Cheung and\n Wang Xi and\n Yanyan Jiang and\n Chun Cao and\n Xiaoxing Ma and\n Jian Lu},\n title = {Hybrid {CPU-GPU} constraint checking: Towards efficient context consistency},\n journal = {Information {&} Software Technology},\n volume = {74},\n pages = {230--242},\n year = {2016},\n url = {http://dx.doi.org/10.1016/j.infsof.2015.10.003},\n doi = {10.1016/j.infsof.2015.10.003},\n timestamp = {Mon, 25 Apr 2016 19:47:34 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/infsof/SuiXCX0CML16},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Taming Android Fragmentation: Characterizing and Detecting Compatibility Issues for Android Apps","date":"2016","authors":["Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"31st IEEE/ACM International Conference on Automated Software Engineering (ASE 2016), Singapore, Sept 2016","venueShort":"ASE","tags":["Android","Empirical Study","Fault Detection"],"awards":["Distinguished Paper"],"abstract":"\n Android ecosystem is heavily fragmented. The numerous combinations of different device models and operating system versions make it impossible for Android app developers to exhaustively test their apps. As a result, various compatibility issues arise, causing poor user experience. However, little is known on the characteristics of such fragmentation-induced compatibility issues and no mature tools exist to help developers quickly diagnose and fix these issues. To bridge the gap, we conducted an empirical study on 191 real-world compatibility issues collected from popular open-source Android apps. Our study characterized the symptoms and root causes of compatibility issues, and disclosed that the patches of these issues exhibit common patterns. With these findings, we propose a technique named FicFinder to automatically detect compatibility issues in Android apps. FicFinder performs static code analysis based on a model that captures Android APIs as well as their associated context by which compatibility issues are triggered. FicFinder reports actionable debugging information to developers when it detects potential issues. We evaluated FicFinder with 27 large-scale open-source Android apps. The results show that FicFinder can precisely detect compatibility issues in these apps and uncover previously-unknown issues.\n ","paperUrl":"http://sccpu2.cse.ust.hk/ficfinder/ASE_FicFinder.pdf","projectUrl":"http://sccpu2.cse.ust.hk/ficfinder/index.html","bibtex":"@inproceedings{Wei_ASE16,\n\tauthor = {Lili Wei and Yepang Liu and\n\t\t \t Shing{-}Chi Cheung},\n\ttitle = {Taming Android Fragmentation: Characterizing and Detecting Compatibility Issues for Android Apps},\n\tbooktitle = {Proceedings of the 31st IEEE/ACM International Conference on Automated Software Engineering, {ASE} 2016},\n\tyear = {2016}\n}","arxivUrl":null,"slidesUrl":null},{"title":"OASIS: Prioritizing Static Analysis Warnings for Android Apps Based on App User Reviews","date":"2017","authors":["Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"11th joint meeting of the European Software Engineering Conference and the ACM SIGSOFT Symposium on the Foundations of Software Engineering (ESEC/FSE 2017), Paderborn, Germany, Sept 2017","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Lint is a widely-used static analyzer for detecting bugs/issues in Android apps. However, it can generate many false warnings. One existing solution to this problem is to leverage project history data (e.g., bug fixing statistics) for warning prioritization. Unfortunately, such techniques are biased toward a project’s archived warnings and can easily miss newissues. Anotherweakness is that developers cannot readily relate the warnings to the impacts perceivable by users. To overcome these weaknesses, in this paper, we propose a semantics-aware approach, OASIS, to prioritizing Lint warnings by leveraging app user reviews. OASIS combines program analysis and NLP techniques to recover the intrinsic links between the Lint warnings for a given app and the user complaints on the app problems caused by the issues of concern. OASIS leverages the strength of such links to prioritize warnings. We evaluated OASIS on six popular and large-scale open-source Android apps. The results show that OASIS can effectively prioritize Lint warnings and help identify new issues that are previously-unknown to app developers.\n ","paperUrl":"materials/OASIS_author_copy.pdf","bibtex":"@inproceedings{Wei_FSE17,\n\tauthor = {Lili Wei and Yepang Liu and\n\t\t \t Shing{-}Chi Cheung},\n\ttitle = {OASIS: Prioritizing Static Analysis Warnings for Android Apps Based on App User Reviews},\n\tbooktitle = {joint meeting of the European Software Engineering Conference and the ACM SIGSOFT Symposium on the Foundations of Software Engineering, {ESEC/FSE} 2017},\n\tyear = {2017}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Understanding and Detecting Fragmentation-Induced Compatibility Issues for Android Apps","date":"2020","authors":["Lili Wei","Yepang Liu","Shing-Chi Cheung","Huaxun Huang","Xuan Lu","Xuanzhe Liu"],"venue":"IEEE Transactions on Software Engineering 2020","venueShort":"TSE","tags":["Android","Bug Detection"],"abstract":"\n Android ecosystem is heavily fragmented. The numerous combinations of different device models and operating system versions make it impossible for Android app developers to exhaustively test their apps, and thus various compatibility issues arise. Unfortunately, little is known on the characteristics of such fragmentation-induced compatibility issues. No mature tools exist to help developers quickly diagnose and fix these issues. To bridge the gap, we conducted an empirical study on 220 real-world compatibility issues collected from five popular open-source Android apps. We further interviewed Android practitioners and conducted an online survey to gain insights from real practices. Via the studies, we characterized compatibility issues, investigated common practices to handle compatibility issues, and disclosed that these issues exhibit common patterns. With these findings, we propose a technique, FicFinder, to automatically detect compatibility issues in Android apps. FicFinder performs static code analysis based on a model that captures Android APIs as well as their associated context by which compatibility issues can be triggered. FicFinder reports actionable debugging information to developers when it detects potential issues. We evaluated FicFinder with 53 large-scale open-source Android apps. The results show that FicFinder can precisely detect compatibility issues in these apps and uncover previously-unknown issues.\n ","paperUrl":"materials/TSE19-lili.pdf","bibtex":"@ARTICLE{DBLP:journals/tse/WeiLCHLL20,\n author = {Lili Wei and\n Yepang Liu and\n Shing{-}Chi Cheung and\n Huaxun Huang and\n Xuan Lu and\n Xuanzhe Liu},\n title = {Understanding and Detecting Fragmentation-Induced Compatibility Issues\n for Android Apps},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {46},\n number = {11},\n pages = {1176--1199},\n year = {2020},\n url = {https://doi.org/10.1109/TSE.2018.2876439},\n doi = {10.1109/TSE.2018.2876439},\n timestamp = {Thu, 31 Dec 2020 01:35:38 +0100},\n biburl = {https://dblp.org/rec/journals/tse/WeiLCHLL20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"PIVOT: Learning API-Device Correlations to Facilitate Android Compatibility Issue Detection","date":"2019","authors":["Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"International Conference on Software Engineering 2019, Technical Research Paper, Montréal, QC, Canada, 25 May - 31 May","venueShort":"ICSE","tags":[],"awards":["Distinguished Artifact"],"abstract":"\n The heavily fragmented Android ecosystem has induced various compatibility issues in Android apps. The search space for such fragmentation-induced compatibility issues (FIC issues) is huge, comprising three dimensions: device models, Android OS versions, and Android APIs. FIC issues, especially those arising from device models, evolve quickly with the frequent release of new device models to the market. As a result, an automated technique is desired to maintain timely knowledge of such FIC issues, which are mostly undocumented. In this paper, we propose such a technique, PIVOT, that automatically learns API-device correlations of FIC issues from existing Android apps. PIVOT extracts and prioritizes API-device correlations from a given corpus of Android apps. We evaluated PIVOT with popular Android apps on Google Play. Evaluation results show that PIVOT can effectively prioritize valid API-device correlations for app corpora collected at different time. Leveraging the knowledge in the learned API-device correlations, we further conducted a case study and successfully uncovered ten previously-undetected FIC issues in open-source Android apps.\n ","paperUrl":"materials/ICSE19-lili.pdf","projectUrl":"https://ficissuepivot.github.io/Pivot/","bibtex":"@inproceedings {ICSE19Wei,\n title = {{PIVOT: Learning API-Device Correlations to Facilitate Android Compatibility Issue Detection}},\n author = {Lili Wei and Yepang Liu and Shing-Chi Cheung},\n booktitle = {{Proceedings of the 41th International Conference on Software Engineering}, {ICSE 2019}},\n year = {2019},\n pages = {11}\n}","arxivUrl":null,"slidesUrl":null},{"title":"Which Generated Test Failures Are Fault Revealing? Prioritizing Failures Based on Inferred Precondition Violations using PAF","date":"2018","authors":["Mijung Kim","Shing-Chi Cheung","Sunghun Kim"],"venue":"The ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, Technical Research Paper, Lake Buena Vista, Florida, 4 Nov - 9 Nov 2018","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Automated unit testing tools, such as Randoop, have been developed to produce failing tests as means of finding faults. However, these tools often produce false alarms, so are not widely used in practice. The main reason for a false alarm is that the generated failing test violates an implicit precondition of the method under test, such as a field should not be null at the entry of the method. This condition is not explicitly programmed or documented but implicitly assumed\n\t\t\t\tby developers. To address this limitation, we propose a technique called Paf to cluster generated test failures due to the same cause and reorder them based on their likelihood of violating an implicit precondition of the method under test. From various test executions, Paf observes their dataflows to the variables whose values are used when the program fails. Based on the dataflow similarity and where these values are originated, Paf clusters failures and determines\n\t\t\t\ttheir likelihood of being fault revealing. We integrated Paf into Randoop. Our empirical results on open-source projects show that Paf effectively clusters fault revealing tests arising from the same\n\t\t\t\tfault and successfully prioritizes the fault-revealing ones.\n ","paperUrl":"materials/fse18-mijung.pdf","bibtex":"@inproceedings{kim2018paf,\n title={Which Generated Test Failures Are Fault Revealing? Prioritizing\nFailures Based on Inferred Precondition Violations using PAF},\n author={Kim, Mijung and Cheung, Shing-Chi and Kim, Sunghun},\n booktitle={Proceedings of the 2018 26th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE 2018)},\n pages={1--12},\n year={2018},\n organization={ACM}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Locus: Locating Bugs from Software Changes","date":"2016","authors":["Ming Wen","Rongxin Wu","Shing-Chi Cheung"],"venue":"31st IEEE/ACM International Conference on Automated Software Engineering (ASE 2016), Singapore, Sept 2016","venueShort":"ASE","tags":[],"abstract":"\n Various information retrieval (IR) based techniques have been proposed recently to locate bugs automatically at the file level. However, their usefulness is often compromised by the coarse granularity of files and the lack of contextual information. To address this, we propose to locate bugs using software changes, which offer finer granularity than files and provide important contextual clues for bug-fixing. We observe that bug inducing changes can facilitate the bug fixing process. For example, it helps triage the bug fixing task to the developers who committed the bug inducing changes or enables developers to fix bugs by reverting these changes. Our study further identifies that change logs and the naturally small granularity of changes can help boost the performance of IR-based bug localization. Motivated by these observations, we propose an IR-based approach Locus to locate bugs from software changes, and evaluate it on six large open source projects. The results show that Locus outperforms existing techniques at the source file level localization significantly. MAP and MRR in particular have been improved, on average, by 20.1% and 20.5%, respectively. Locus is also capable of locating the inducing changes within top 5 for 41.0% of the bugs. The results show that Locus can significantly reduce the number of lines needing to be scanned to locate the bug compared with existing techniques.\n ","paperUrl":"http://home.cse.ust.hk/~mwenaa/paper/ASE16-Locus.pdf","projectUrl":"http://www.cse.ust.hk/~mwenaa/Locus.html","bibtex":"@inproceedings{Wei_ASE16,\n\tauthor = {Ming Wen and Rongxin Wu and\n\t\t \t Shing{-}Chi Cheung},\n\ttitle = {Locus: Locating Bugs from Software Changes},\n\tbooktitle = {Proceedings of the 31st IEEE/ACM International Conference on Automated Software Engineering, {ASE} 2016},\n\tyear = {2016}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Context-Aware Patch Generation for Better Automated Program Repair","date":"2018","authors":["Ming Wen","Junjie Chen","Rongxin Wu","Dan Hao","Shing-Chi Cheung"],"venue":"International Conference on Software Engineering, Technical Research Paper, Gothenburg, Sweden, May 27 - 3 June 2018","venueShort":"ICSE","tags":[],"abstract":"\n The effectiveness of search-based automated program repair is limited in the number of correct patches that can be successfully generated.\nThere are two causes of such limitation. \nFirst, the search space does not contain the correct patch. \nSecond, the search space is huge and therefore the correct patch cannot be generated (ie correct patches are either generated after incorrect plausible ones or not generated within the time budget).\n\nTo increase the likelihood of including the correct patches in the search space, we propose to work at a fine granularity in terms of AST nodes.\nThis, however, will further enlarge the search space, increasing the challenge to find the correct patches.\nWe address the challenge by devising a strategy to prioritize the candidate patches based on their likelihood of being correct.\nSpecifically, we study the use of AST nodes' context information to estimate the likelihood.\n\nIn this paper, we propose CapGen, a context-aware patch generation technique.\nThe novelty which allows CapGen to produce more correct patches lies in three aspects:\n(1) The fine-granularity design enables it to find more correct fixing ingredients;\n(2) The context-aware prioritization of mutation operators enables it to constrain the search space;\n(3) Three context-aware models enable it to rank correct patches at high positions before incorrect plausible ones.\nWe evaluate CapGen on Defects4J and compare it with the state-of-the-art program repair techniques.\nOur evaluation shows that CapGen outperforms and complements existing techniques.\nCapGen achieves a high precision of 84.00% and can prioritize the correct patches before 98.78% of the incorrect plausible ones.\n ","paperUrl":"materials/Repair.pdf","bibtex":"@inproceedings {ICSE18,\n title = {{Context-Aware Patch Generation for Better Automated Program Repair}},\n author = {Ming, Wen and Junjie, Chen and Rongxin, Wu and Dan, Hao and Shing-Chi, Cheung},\n booktitle = {{Proceedings of the 40th International Conference on Software Engineering}},\n series = {ICSE 2016},\n year = {2018},\n doi = {10.1145/3180155.3180233},\n url = {http://home.cse.ust.hk/~mwenaa/paper/Repair.pdf},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"How Well Do Change Sequences Predict Defects? Sequence Learning from Software Changes","date":"2020","authors":["Ming Wen","Rongxin Wu","Shing-Chi Cheung"],"venue":"IEEE Transactions on Software Engineering 2020","venueShort":"TSE","tags":["Software Analytics","Defect Prediction"],"abstract":"\n Software defect prediction, which aims to identify defective modules, can assist developers in finding bugs and prioritizing limited quality assurance resources. Various features to build defect prediction models have been proposed and evaluated. Among them, process metrics are one important category. Yet, existing process metrics are mainly encoded manually from change histories and ignore the sequential information arising from the changes during software evolution. Unlike traditional process metrics used for existing defect prediction models, change sequences are mostly vectors of variable length. This makes it difficult to apply such sequences directly in prediction models that are driven by conventional classifiers. To resolve this challenge, we utilize Recurrent Neural Network (RNN), which is a deep learning technique, to encode features from sequence data automatically. In this paper, we propose a novel approach called Fences, which extracts six types of change sequences covering different aspects of software changes via fine-grained change analysis. It approaches defects prediction by mapping it to a sequence labeling problem solvable by RNN. Our evaluations on 10 open source projects show that Fences can predict defects with high performance. Fences also outperforms the state-of-the-art technique which learns semantic features automatically from static code via deep learning.\n ","paperUrl":"materials/TSE19-ming.pdf","bibtex":"@article{DBLP:journals/tse/WenWC20,\n author = {Ming Wen and\n Rongxin Wu and\n Shing{-}Chi Cheung},\n title = {How Well Do Change Sequences Predict Defects? Sequence Learning from\n Software Changes},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {46},\n number = {11},\n pages = {1155--1175},\n year = {2020},\n url = {https://doi.org/10.1109/TSE.2018.2876256},\n doi = {10.1109/TSE.2018.2876256},\n timestamp = {Tue, 02 Feb 2021 18:29:15 +0100},\n biburl = {https://dblp.org/rec/journals/tse/WenWC20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Exploring and Exploiting the Correlations between Bug-Inducing and Bug-Fixing Commits","date":"2019","authors":["Ming Wen","Rongxin Wu","Yepang Liu","Yongqiang Tian","Xuan Xie","Shing-Chi Cheung","Zhendong Su"],"venue":"The ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering 2019, Technical Research Paper, Tallinn, Estonia","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Bug-inducing commits provide important information to understand when and how bugs were introduced.\n\t\t\t\tTherefore, they have been extensively investigated by existing studies and frequently leveraged to facilitate bug fixings in industrial practices.\n\n\t\t\t\tDue to the importance of bug-inducing commits in software debugging,\n\t\t\t\twe are motivated to conduct the first systematic empirical study to explore the correlations between bug-inducing and bug-fixing commits in terms of code elements and modifications.\n\t\t\t\tTo facilitate the study, we collected the inducing and fixing commits for 333 bugs from seven large open-source projects.\n\t\t\t\tThe empirical findings reveal important and significant correlations between a bug's inducing and fixing commits.\n\t\t\t\tWe further exploit the usefulness of such correlation findings from two aspects.\n\t\t\t\tFirst, they explain why the SZZ algorithm, the most widely-adopted approach to collecting bug-inducing commits, is imprecise.\n\t\t\t\tIn view of SZZ's imprecision, we revisited the findings of previous studies based on SZZ,\n\t\t\t\tand found that 8 out of 10 previous findings are significantly affected by SZZ's imprecision.\n\t\t\t\tSecond, they shed lights on the design of automated debugging techniques.\n\t\t\t\tFor demonstration, we designed approaches that exploit the correlations with respect to statements and change actions.\n\t\t\t\tOur experiments on \textsc{Defects4J} show that our approaches can boost the performance of fault localization significantly and also advance existing APR techniques.\n ","paperUrl":"materials/FSE19-ming.pdf","projectUrl":"https://github.com/justinwm/InduceBenchmark","bibtex":"@inproceedings{wen2019exploring,\n title={Exploring and Exploiting the Correlations between Bug-Inducing and Bug-Fixing Commits.},\n author={Wen, Ming and Wu, Rongxin and Liu, Yepang and Tian, Yongqiang and Xie, Xuan and Cheung, Shing-Chi and Su, Zhendong},\n booktitle={Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},\n to appear,\n year={2019},\n organization={ACM}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Exposing Library API Misuses via Mutation Analysis","date":"2019","authors":["Ming Wen","Yepang Liu","Rongxin Wu","Xuan Xie","Shing-Chi Cheung","Zhendong Su"],"venue":"International Conference on Software Engineering 2019, Technical Research Paper, Montréal, QC, Canada, 25 May - 31 May","venueShort":"ICSE","tags":[],"abstract":"\n Misuses of library APIs are pervasive and often lead to software crashes and vulnerability issues. Various static analysis tools have been proposed to detect library API misuses. They often involve mining frequent patterns from a large number of correct API usage examples, which can be hard to obtain in practice. They also suffer from low precision due to an over-simplified assumption that a deviation from frequent usage patterns indicates a misuse.\n\t\t\t\tWe make two observations on the discovery of API misuse patterns. First, API misuses can be represented as mutants of the corresponding correct usages. Second, whether a mutant will introduce a misuse can be validated via executing it against a test suite and analyzing the execution information. Based on these observations, we propose MUTAPI, the first approach to discovering API misuse patterns via mutation analysis. To effectively mimic API misuses based on correct usages, we first design eight effective mutation operators inspired by the common characteristics of API misuses. MUTAPI generates mutants by applying these mutation operators on a set of client projects and collects mutant-killing tests as well as the associated stack traces. Misuse patterns are discovered from the killed mutants that are prioritized according to their likelihood of causing API misuses based on the collected information. We applied MUTAPI on 16 client projects with respect to 73 popular Java APIs. The results show that MUTAPI is able to discover substantial API misuse patterns with a high precision of 0.78. It also achieves a recall of 0.49 on the MUBENCH benchmark, which outperforms the state-of-the-art techniques.\n ","paperUrl":"materials/ICSE19-ming.pdf","bibtex":"@inproceedings {WEN2019API,\n title = {{Exposing Library API Misuses via Mutation Analysis}},\n author = {Ming, Wen and Yepang, Liu and Rongxin, Wu and Xuan, Xie and Shing-Chi, Cheung and Zhendong, Su},\n booktitle = {{Proceedings of the 41th International Conference on Software Engineering}},\n series = {ICSE 2019},\n year = {2019},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Historical Spectrum based Fault Localization","date":"2021","authors":["Ming Wen","Junjie Chen","Yongqiang Tian","Rongxin Wu","Dan Hao","Shi Han","Shing-Chi Cheung"],"venue":"Transactions of Software Engineering 2021","venueShort":"TSE","tags":[],"abstract":"\n Spectrum-based fault localization (SBFL) techniques are widely studied and have been evaluated to be effective in locating faults. Recent studies also showed that developers from industry value automated SBFL techniques. However, their effectiveness is still limited by two main reasons. First, the test coverage information leveraged to construct the spectrum does not reflect the root cause directly. Second, SBFL suffers from the tie issue so that the buggy code entities can not be well differentiated from non-buggy ones. To address these challenges, we propose to leverage the information of version histories in fault localization based on the following two intuitions. First, version histories record how bugs are introduced to software projects and this information reflects the root cause of bugs directly. Second, the evolution histories of code can help differentiate those suspicious code entities ranked in tie by SBFL. Our intuitions are also inspired by the observations on debugging practices from large open source projects and industry.\n\t\t\t\tBased on the intuitions, we propose a novel technique HSFL (historical spectrum based fault localization). Specifically, HSFL identifies bug-inducing commits from the version history in the first step. It then constructs historical spectrum (denoted as Histrum) based on bug-inducing commits, which is another dimension of spectrum orthogonal to the coverage based spectrum used in SBFL. HSFL finally ranks the suspicious code elements based on our proposed Histrum and the conventional spectrum. HSFL outperforms the state-of-the-art SBFL techniques significantly on the Defects4J benchmark. Specifically, it locates and ranks the buggy statement at Top-1 for 77.8% more bugs as compared with SBFL, and 33.9% more bugs at Top-5. Besides, for the metrics MAP and MRR, HSFL achieves an average improvement of 28.3% and 40.8% over all bugs, respectively. Moreover, HSFL can also outperform other six families of fault localization techniques, and our proposed Histrum model can be integrated with different families of techniques and boost their performance.\n ","paperUrl":"materials/TSE20-ming.pdf","projectUrl":"https://github.com/justinwm/HSFL/","bibtex":"@article{WEN_TSE21,\n author = {Ming, Wen and Junjie, Chen and Yongqiang, Tian and Rongxin, Wu and Dan, Hao and Shi, Han and Shing-Chi, Cheung},\n title = {Historical Spectrum based Fault Localization},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {47},\n number = {11},\n pages = {2348--2368},\n year = {2021},\n url = {https://doi.org/10.1109/TSE.2019.2948158},\n doi = {10.1109/TSE.2019.2948158}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"CrashLocator: Locating Crashing Faults based on Crash Stacks","date":"2014","authors":["Rongxin Wu","Hongyu Zhang","Shing-Chi Cheung","Sunghun Kim"],"venue":"International Symposium on Software Testing and Analysis (ISSTA 2014), San Jose, California, USA, July 2014","venueShort":"ISSTA","tags":["Fault Localization"],"awards":["Distinguished Paper"],"abstract":"\n Software crash is common. When a crash occurs, software developers can receive a report upon user permission. A crash report typically includes a call stack at the time of crash. An important step of debugging a crash is to identify faulty functions, which is often a tedious and labor-intensive task. In this paper, we propose CrashLocator, a method to locate faulty functions using the crash stack information in crash reports. It deduces possible crash traces (the failing execution traces that lead to crash) by expanding the crash stack with functions in static call graph. It then calculates the suspiciousness of each function in the approximate crash traces. The functions are then ranked by their suspiciousness scores and are recommended to developers for further investigation. We evaluate our approach using real-world Mozilla crash data. The results show that our approach is effective: we can locate 50.6%, 63.7% and 67.5% of crashing faults by examining top 1, 5 and 10 functions recommended by CrashLocator, respectively. Our approach outperforms the conventional stack-only methods significantly.\n ","paperUrl":"http://dl.acm.org/citation.cfm?doid=2610384.2610386","bibtex":"@inproceedings{DBLP:conf/issta/WuZCK14,\n author = {Rongxin Wu and\n Hongyu Zhang and\n Shing{-}Chi Cheung and\n Sunghun Kim},\n title = {CrashLocator: locating crashing faults based on crash stacks},\n booktitle = {International Symposium on Software Testing and Analysis, {ISSTA}\n '14, San Jose, CA, {USA} - July 21 - 26, 2014},\n pages = {204--214},\n year = {2014},\n crossref = {DBLP:conf/issta/2014},\n url = {http://doi.acm.org/10.1145/2610384.2610386},\n doi = {10.1145/2610384.2610386},\n timestamp = {Sun, 13 Jul 2014 13:49:26 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/issta/WuZCK14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null},{"title":"Casper: An Efficient Approach to Call Trace Collection","date":"2016","authors":["Rongxin Wu","Xiao Xiao","Shing-Chi Cheung","Hongyu Zhang","Charles Zhang"],"venue":"43rd ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages (POPL 2016)","venueShort":"POPL","tags":[],"abstract":"\n Call traces, i.e., sequences of function calls and returns, are fundamental to a wide range of program analyses such as bug reproduction, fault diagnosis, performance analysis, and many others. The conventional approach to collect call traces that instruments each function call and return site incurs large space and time overhead. Our approach aims at reducing the recording overheads by instrumenting only a small amount of call sites while keeping the capability of recovering the full trace. We propose a call trace model and a logged call trace model based on an LL(1) grammar, which enables us to define the criteria of a feasible solution to call trace collection. Based on the two models, we prove that to collect call traces with minimal instrumentation is an NP-hard problem. We then propose an efficient approach to obtaining a suboptimal solution. We implemented our approach as a tool Casper and evaluated it using the DaCapo benchmark suite. The experiment results show that our approach causes significantly lower runtime (and space) overhead than two state-of-the-arts approaches.\n ","paperUrl":"http://home.cse.ust.hk/~wurongxin/files/wurongxin_popl2016.pdf","bibtex":"@inproceedings{Wu_POPL2016,\n author = {Rongxin Wu and\n Xiao Xiao and\n Shing{-}Chi Cheung and\n Hongyu Zhang and\n Charles Zhang},\n title = {Casper: an efficient approach to call trace collection},\n booktitle = {Proceedings of the 43rd Annual {ACM} {SIGPLAN-SIGACT} Symposium on\n Principles of Programming Languages, {POPL} 2016, St. Petersburg,\n FL, USA, January 20 - 22, 2016},\n pages = {678--690},\n year = {2016},\n crossref = {DBLP:conf/popl/2016},\n url = {http://doi.acm.org/10.1145/2837614.2837619},\n doi = {10.1145/2837614.2837619},\n timestamp = {Wed, 09 Mar 2016 08:11:59 +0100},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/popl/WuXCZZ16},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"ChangeLocator: Locate Crash-Inducing Changes Based on Crash Reports","date":"2018","authors":["Rongxin Wu","Ming Wen","Shing-Chi Cheung","Hongyu Zhang"],"venue":"Journal of Empirical Software Engineering (EmSE 2018)","venueShort":"EmSE","tags":[],"abstract":"\n Software crashes are severe manifestations of software bugs. Debugging crashing bugs is tedious and time-consuming. Understanding software changes that induce a crashing bug can provide useful contextual information for bug fixing and is highly demanded by developers. Locating the bug inducing changes is also useful for automatic program repair, since it narrows down the root causes and reduces the search space of bug fix location. However, currently there are no systematic studies on locating the software changes to a source code repository that induce a crashing bug reflected by a bucket of crash reports. To tackle this problem, we first conducted an empirical study on characterizing the bug inducing changes for crashing bugs (denoted as crash-inducing changes). We also propose ChangeLocator, a method to automatically locate crash-inducing changes for a given bucket of crash reports. We base our approach on a learning model that uses features originated from our empirical study and train the model using the data from the historical fixed crashes. We evaluated ChangeLocator with six release versions of Netbeans project. The results show that it can locate the crash-inducing changes for 44.7%, 68.5%, and 74.5% of the bugs by examining only top 1, 5 and 10 changes in the recommended list, respectively. It significantly outperforms the existing state-of-the-art approach.\n ","paperUrl":"materials/ChangeLocator.pdf","bibtex":"@article{wu2018changelocator,\n title={ChangeLocator: locate crash-inducing changes based on crash reports},\n author={Wu, Rongxin and Wen, Ming and Cheung, Shing-Chi and Zhang, Hongyu},\n journal={Empirical Software Engineering},\n volume={23},\n number={5},\n pages={2866--2900},\n year={2018},\n publisher={Springer}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"How Effectively can Spreadsheet Anomalies be Detected: An Empirical Study","date":"2017","authors":["Ruiqing Zhang","Chang Xu","Shing-Chi Cheung","Ping Yu","Xiaoxing Ma","Jian Lu"],"venue":"The Journal of Systems and Software (JSS)","venueShort":"JSS ","tags":[],"abstract":"\n While spreadsheets are widely used, they have been found to be error-prone. Various techniques have been proposed to detect anomalies in spreadsheets, with varying scopes and effectiveness. Nevertheless, there is no empirical study comparing these techniques' practical usefulness and effectiveness. In this work, we conducted a large-scale empirical study of three state-of-the-art techniques on their effectiveness in detecting spreadsheet anomalies. Our study focused on the precision, recall rate, efficiency and scope. We found that one technique outperforms the other two in precision and recall rate of spreadsheet anomaly detection. Efficiency of the three techniques is acceptable for most spreadsheets, but they may not be scalable to large spreadsheets with complex formulas. Besides, they have different scopes for detecting different spreadsheet anomalies, thus complementing to each other. We also discussed limitations of these three techniques. Based on our findings, we give suggestions for future spreadsheet research.\n ","paperUrl":"http://cs.nju.edu.cn/_upload/tpl/01/55/341/template341/1_publications/JSS16.pdf","bibtex":"@article{Zhang_JSS17,\n\tauthor = {Ruiqing Zhang, Chang Xu, Shing-Chi Cheung, Ping Yu, Xiaoxing Ma and Jian Lu},\n\ttitle = {How Effective can Spreadsheet Anomalies be Detected: An Empirical Study},\n\tjournal = {The Journal of Systems and Software (JSS)},\n\tyear = {2017}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Automatic Detection and Update Suggestion for Outdated API Names in Documentation","date":"2021","authors":["Seonah Lee","Rongxin Wu","Shing-Chi Cheung","Sungwon Kang"],"venue":"IEEE Transactions on Software Engineering 2021","venueShort":"TSE","tags":["Software Analytics","API Misuse"],"abstract":"\n Application programming interfaces (APIs) continually evolve to meet ever-changing user needs, and documentation provides an authoritative reference for their usage. However, API documentation is commonly outdated because nearly all of the associated updates are performed manually. Such outdated documentation, especially with regard to API names, causes major software development issues. In this paper, we propose a method for automatically updating outdated API names in API documentation. Our insight is that API updates in documentation can be derived from API implementation changes between code revisions. To evaluate the proposed method, we applied it to four open source projects. Our evaluation results show that our method, FreshDoc, detects outdated API names in API documentation with 48% higher accuracy than the existing state-of-the-art methods do. Moreover, when we checked the updates suggested by FreshDoc against the developers? manual updates in the revised documentation, FreshDoc addressed 82% of the outdated names. When we reported 40 outdated API names found by FreshDoc via issue tracking systems, developers accepted 75% of the suggestions. These evaluation results indicate that FreshDoc can be used as a practical method for the detection and updating of API names in the associated documentation.\n ","paperUrl":"materials/TSE19-lee.pdf","bibtex":"@article{DBLP:journals/tse/LeeWCK21,\n author = {Seonah Lee and\n Rongxin Wu and\n Shing{-}Chi Cheung and\n Sungwon Kang},\n title = {Automatic Detection and Update Suggestion for Outdated {API} Names\n in Documentation},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {47},\n number = {4},\n pages = {653--675},\n year = {2021},\n url = {https://doi.org/10.1109/TSE.2019.2901459},\n doi = {10.1109/TSE.2019.2901459},\n timestamp = {Thu, 29 Apr 2021 15:14:58 +0200},\n biburl = {https://dblp.org/rec/journals/tse/LeeWCK21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Automatic Spreadsheet Cell Clustering and Smell Detection Using Strong and Weak Features","date":"2016","authors":["Shing-Chi Cheung","Wanjun Chen","Yepang Liu","Chang Xu"],"venue":"38th International Conference on Software Engineering (ICSE 2016), Austin, TX, USA, May 2016","venueShort":"ICSE","tags":[],"abstract":"\n Various techniques have been proposed to detect smells in spreadsheets, which are susceptible to errors. These techniques typically detect spreadsheet smells through a mechanism based on a fixed set of patterns or metric thresholds. Unlike conventional programs, tabulation styles vary greatly across spreadsheets. Smell detection based on fixed patterns or metric thresholds, which are insensitive to the varying tabulation styles, can miss many smells in one spreadsheet while reporting many spurious smells in another. In this paper, we propose CUSTODES to effectively cluster spreadsheet cells and detect smells in these clusters. The clustering mechanism can automatically adapt to the tabulation styles of each spreadsheet using strong and weak features. These strong and weak features capture the invariant and variant parts of tabulation styles, respectively. As smelly cells in a spreadsheet normally occur in minority, they can be mechanically detected as clusters' outliers in feature spaces. We implemented and applied CUSTODES to 70 spreadsheets files randomly sampled from the EUSES corpus. These spreadsheets contain 1,610 formula cell clusters. Experimental results confirmed that CUSTODES is effective. It successfully detected harmful smells that can induce computation anomalies in spreadsheets with an F-measure of 0.72, outperforming state-of-the-art techniques.\n ","paperUrl":"http://doi.acm.org/10.1145/2884781.2884796","projectUrl":"http://sccpu2.cse.ust.hk/custodes/","slidesUrl":"http://sccpu2.cse.ust.hk/castle/materials/Custodes.4.pdf","bibtex":"@inproceedings{Cheung_ICSE2016,\n author = {Shing{-}Chi Cheung and\n Wanjun Chen and\n Yepang Liu and\n Chang Xu},\n title = {{CUSTODES:} automatic spreadsheet cell clustering and smell detection\n using strong and weak features},\n booktitle = {Proceedings of the 38th International Conference on Software Engineering,\n {ICSE} 2016, Austin, TX, USA, May 14-22, 2016},\n pages = {464--475},\n year = {2016},\n crossref = {DBLP:conf/icse/2016},\n url = {http://doi.acm.org/10.1145/2884781.2884796},\n doi = {10.1145/2884781.2884796},\n timestamp = {Sun, 15 May 2016 11:55:22 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/CheungCLX16},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"Automating Object Transformations for Dynamic Software Updating via Online Execution Synthesis","date":"2018","authors":["Tianxiao Gu","Xiaoxing Ma","Chang Xu","Yanyan Jiang","Chun Cao","Jian Lu"],"venue":"32nd European Conference on Object-Oriented Programming (ECOOP 2018), Article 19","venueShort":"ECOOP","tags":[],"abstract":"\n Dynamic software updating (DSU) is a technique to upgrade a running software system on the fly without stopping the system. During updating, the runtime state of the modified components of the system needs to be properly transformed into a new state, so that the modified components can still correctly interact with the rest of the system. However, the transformation is non-trivial to realize due to the gap between the low-level implementations of two versions of a program. This paper presents AOTES, a novel approach to automating object transformations for dynamic updating of Java programs. AOTES bridges the gap by abstracting the old state of an object to a history of method invocations, and re-invoking the new version of all methods in the history to get the desired new state. AOTES requires no instrumentation to record any data and thus has no overhead during normal execution. We propose and implement a novel technique that can synthesize an equivalent history of method invocations based on the current object state only. We evaluated AOTES on software updates taken from Apache Commons Collections, Tomcat, FTP Server and SSHD Server. Experimental results show that AOTES successfully handled 51 of 61 object transformations of 21 updated classes, while two state-of-the-art approaches only handled 11 and 6 of 61, respectively.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/ECOOP18.pdf","bibtex":"@inproceedings{gu_automating_2018,\n author = {Tianxiao Gu and Xiaoxing Ma and Chang Xu and Yanyan Jiang and Chun Cao and Jian Lu},\n title = {Automating object transformations for dynamic software updating via online execution synthesis},\n pages = {to appear},\n year = {2018},\n booktitle = {Proceedings of the 32nd European Conference on Object-Oriented Programming (ECOOP)},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"RECONTEST: Effective Regression Testing of Concurrent Programs","date":"2015","authors":["Valerio Terragni","Shing-Chi Cheung","Charles Zhang"],"venue":"37th International Conference on Software Engineering (ICSE 2015), Florence, Italy, May 16-24, 2015","venueShort":"ICSE","tags":[],"abstract":"\n Concurrent programs proliferate as multi-core technologies advance. As a result, the conventional approach that selects a sub-set of test cases for regression testing without considering interleavings is insufficient. In this paper we present RECONTEST to address the problem by selecting the new interleavings that arise due to code changes. These interleavings must be explored in order to uncover regression bugs. RECONTEST efficiently selects new interleavings by first identifying shared memory accesses that are affected by the changes, and then exploring only those problematic interleavings that contain at least one of these accesses. We have implemented RECONTEST as an automated tool and evaluated it using 13 real-world concurrent program subjects. Our results show that RECONTEST can significantly reduce the regression testing cost without missing any faulty interleavings induced by code changes.\n ","paperUrl":"http://home.cse.ust.hk/~vterragni/files/Terragni_ICSE2015.pdf","bibtex":"@inproceedings{TERRAGNI_ICSE15,\n author = {Valerio Terragni and\n Shing{-}Chi Cheung and\n Charles Zhang},\n title = {{RECONTEST:} Effective Regression Testing of Concurrent Programs},\n booktitle = {37th {IEEE/ACM} International Conference on Software Engineering,\n {ICSE} 2015, Florence, Italy, May 16-24, 2015, Volume 1},\n pages = {246--256},\n year = {2015},\n url = {http://dx.doi.org/10.1109/ICSE.2015.45},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"CSNIPPEX: Automated Synthesis of Compilable Code Snippets from Q&A Sites","date":"2016","authors":["Valerio Terragni","Yepang Liu","Shing-Chi Cheung"],"venue":"Proceedings of the 25th International Symposium on Software Testing and Analysis (ISSTA 2016), Saarbrücken, Germany, July 2016","venueShort":"ISSTA","tags":[],"abstract":"\n Popular Q&A sites like StackOverflow have collected numerous code snippets. However, many of them do not have complete type information, making them uncompilable and inapplicable to various software engineering tasks. This paper analyzes this problem, and proposes a technique CSNIPPEX to automatically convert code snippets into compilable Java source code files by resolving external dependencies, generating import declarations, and fixing syntactic errors. We implemented CSNIPPEX as a plug-in for Eclipse and evaluated it with 242,175 StackOverflow posts that contain code snippets. CSNIPPEX successfully synthesized compilable Java files for 40,410 of them. It was also able to effectively recover import declarations for each post with a precision of 91.04% in a couple of seconds.\n ","paperUrl":"http://www.cse.ust.hk/~vterragni/files/Terragni_ISSTA2016.pdf","bibtex":"@inproceedings{Terragni_ISSTA16,\n author = {Valerio Terragni and Yepang Liu and\n Shing{-}Chi Cheung},\n title = {CSNIPPEX: Automated Synthesis of Compilable Code Snippets from Q&A Sites},\n booktitle = {Proceedings of the 2016 International Symposium on Software Testing\n and Analysis, {ISSTA} 2016},\n pages = {118--129},\n year = {2016},\n url = {http://dx.doi.org/10.1145/2931037.2931058}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Coverage-Driven Test Code Generation for Concurrent Classes","date":"2016","authors":["Valerio Terragni","Shing-Chi Cheung"],"venue":"38th International Conference on Software Engineering (ICSE 2016), Austin, TX, USA, May 2016","venueShort":"ICSE","tags":[],"abstract":"\n Previous techniques on concurrency testing have mainly focused on exploring the interleaving space of manually written test code to expose faulty interleavings of shared memory accesses. These techniques assume the availability of failure-inducing tests. In this paper, we present AutoConTest, a coverage-driven approach to generate effective concurrent test code that achieve high interleaving coverage. AutoConTest consists of three components. First, it computes the coverage requirements dynamically and iteratively during sequential test code generation, using a coverage metric that captures the execution context of shared memory accesses. Second, it smartly selects these sequential codes based on the computed result and assembles them for concurrent tests, achieving increased context-sensitive interleaving coverage. Third, it explores the newly covered interleavings. We have implemented AutoConTest as an automated tool and evaluated it using 6 real-world concurrent Java subjects. The results show that AutoConTest is able to generate effective concurrent tests that achieve high interleaving coverage and expose concurrency faults quickly. AutoConTest took less than 65 seconds (including program analysis, test generation and execution) to expose the faults in the program subjects.\n ","paperUrl":"http://www.cse.ust.hk/~vterragni/files/Terragni_ICSE2016.pdf","bibtex":"@inproceedings{Terragni_ICSE16,\n author = {Valerio Terragni and\n Shing{-}Chi Cheung},\n title = {Coverage-driven test code generation for concurrent classes},\n booktitle = {Proceedings of the 38th International Conference on Software Engineering,\n {ICSE} 2016, Austin, TX, USA, May 14-22, 2016},\n pages = {1121--1132},\n year = {2016},\n url = {http://doi.acm.org/10.1145/2884781.2884876}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"File-level socio-technical congruence and its relationship with bug proneness in OSS projects","date":"2019","authors":["Weiqiang Zhang","Shing-Chi Cheung","Zhenyu Chen","Yuming Zhou","Bin Luo"],"venue":"Journal of Systems and Software 156: 21-40 (2019)","venueShort":"JSS","tags":[],"abstract":"\n Coordination is important in software development. Socio-Technical Congruence (STC) is proposed to measure the match between coordination requirements and actual coordination activities. The previous work of Cataldo et al. computes STC in commercial projects and finds it related to software failures. In this paper, we study the relationship between file-level STC and bug proneness in Open Source Software (OSS) projects. We apply the fundamental STC framework to the OSS data setting and present a method of computing file-level STC based on our available data. We also propose a derivative STC metric called Missing Developer Links (MDL), which is to measure the amount of coordination breakdowns. In our empirical analysis on five OSS projects, we find that MDL is more related to bug proneness than STC. Furthermore, STC or MDL can be computed based on different types of file networks and developer networks, and we find out the best file network and the best developer network via an empirical study. We also evaluate the usefulness of STC or MDL metrics in bug prediction. This work is promising to help detect coordination issues in OSS projects.\n ","paperUrl":"https://www.sciencedirect.com/science/article/pii/S0164121219301177","bibtex":"@article{DBLP:journals/jss/ZhangCCZL19,\n author = {Weiqiang Zhang and\n Shing{-}Chi Cheung and\n Zhenyu Chen and\n Yuming Zhou and\n Bin Luo},\n title = {File-level socio-technical congruence and its relationship with bug\n proneness in {OSS} projects},\n journal = {Journal of Systems and Software},\n volume = {156},\n pages = {21--40},\n year = {2019},\n url = {https://doi.org/10.1016/j.jss.2019.05.030},\n doi = {10.1016/j.jss.2019.05.030},\n timestamp = {Thu, 05 Sep 2019 19:41:26 +0200},\n biburl = {https://dblp.org/rec/bib/journals/jss/ZhangCCZL19},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"A Survey on Dependability Improvement Techniques for Pervasive Computing Systems","date":"2015","authors":["Wenhua Yang","Yepang Liu","Chang Xu","Shing-Chi Cheung"],"venue":"SCIENCE CHINA Information Sciences (SCIS) 58(5), May 2015","venueShort":"SCIS","tags":[],"abstract":"\n The goal of this survey is to summarize the state-of-the-art research results and identify research challenges of developing and deploying dependable pervasive computing systems. We discuss the factors that affect the system dependability and the studies conducted to improve it with respect to these factors. These studies were categorized according to their similarities and differences in hope of shedding some insight into future research. There are three categories: context management, fault detection, and uncertainty handling. These three categories of work address the three most difficult problems of pervasive computing systems. First, pervasive computing systems’ perceived environments, which are also called their contexts, can vary intensively, and thus have a great impact on the systems’ dependability. Second, it is challenging to guarantee the correctness of the systems’ internal computations integrated with interactions with external environments for developers. Fault detection is then an important issue for improving dependability for these systems. Last but not least importantly, pervasive computing systems interact with their environments frequently. These interactions can be affected by many uncertainties, which can jeopardize the systems’ dependability. After a discussion of these pieces of work, we present an outlook for its future research directions.\n ","paperUrl":"http://link.springer.com/article/10.1007%2Fs11432-015-5300-3","bibtex":"@article{DBLP:journals/chinaf/YangLXC15,\n author = {Wenhua Yang and\n Yepang Liu and\n Chang Xu and\n Shing{-}Chi Cheung},\n title = {A survey on dependability improvement techniques for pervasive computing\n systems},\n journal = {{SCIENCE} {CHINA} Information Sciences},\n volume = {58},\n number = {5},\n pages = {1--14},\n year = {2015},\n url = {http://dx.doi.org/10.1007/s11432-015-5300-3},\n doi = {10.1007/s11432-015-5300-3},\n timestamp = {Wed, 29 Apr 2015 12:35:39 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/chinaf/YangLXC15},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Efficient Validation of Self-adaptive Applications by Counterexample Probability Maximization","date":"2018","authors":["Wenhua Yang","Chang Xu","Minxue Pan","Chun Cao","Xiaoxing Ma","Jian Lu"],"venue":"The Journal of Systems and Software (JSS)","venueShort":"JSS","tags":[],"abstract":"\n Self-adaptive applications’ executions can be affected by uncertainty factors like unreliable sensing and flawed adaptation and therefore often error-prone. Existing methods can verify the applications suffering uncertainty and report counterexamples. However, such verification results can deviate from reality when the uncertainty specification used in verification is itself imprecise. This thus calls for further validation of reported counterexamples. One outstanding challenge in counterexample validation is that the probabilities of counterex- amples occurring in real environment are usually very low, which makes the validation extremely inefficient. In this paper, we propose a novel approach to systematically deriving path-equivalent counterexamples with respect to origi- nal ones. The derived counterexamples guarantee to have higher probabilities, making them capable of being validated efficiently in field test. We evaluated our approach with real-world self-adaptive applications. The results reported that our approach significantly increased counterexample probabilities, and the derived counterexamples were also consistently and efficiently validated in both real environment and simulation.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/JSS18.pdf","bibtex":"@inproceedings{yang2018jss,\n\tauthor = {Wenhua Yang, Chang Xu, Minxue Pan, Chun Cao, Xiaoxing Ma, and Jian Lu},\n\ttitle = {The Journal of Systems and Software (JSS)},\n\tyear = {2018},\n\tpages = {82-99}\n\t}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Improving Verification Accuracy of CPS by Modeling and Calibrating Interaction Uncertainty","date":"2018","authors":["Wenhua Yang","Chang Xu","Minxue Pan","Xiaoxing Ma","Jian Lu"],"venue":"ACM Transactions on Internet Technology (TOIT)","venueShort":"TOIT","tags":[],"abstract":"\n Cyber-Physical Systems (CPS) intrinsically combine hardware and physical systems with software and network, which are together creating complex and correlated interactions. CPS applications often experience uncertainty in interacting with environment through unreliable sensor. They can be faulty and exhibit runtime errors if developers have not considered environmental interaction uncertainty adequately. Existing work in verifying CPS applications ignores interaction uncertainty and thus may overlook uncertainty-related faults. To improve verification accuracy, in this article we propose a novel approach to verifying CPS applications with explicit modeling of uncertainty arisen in the interaction between them and the environment. Our approach builds an Interactive State Machine (ISM) network for a CPS application and models interaction uncertainty by error ranges and distributions. Then it encodes both the application and uncertainty models to SMT formula to leverage SMT solvers searching for counterexamples that represent application failures. The precision of uncertainty model can affect the verification results. However, it may be difficult to model interaction uncertainty precisely enough at the beginning, because of the uncontrollable noise of sensors and insufficient data sample size. To further improve the accuracy of the verification results, we propose an approach to identifying and calibrating imprecise uncertainty models. We exploit the inconsistency between the counterexamples’ estimate and actual occurrence probabilities to identify possible imprecision in uncertainty models, and the calibration of imprecise models is to minimize the inconsistency, which is reduced to a Search- Based Software Engineering (SBSE) problem. We experimentally evaluated our verification and calibration approaches with real-world CPS applications, and the experimental results confirmed their effectiveness and efficiency.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/TOIT18.pdf","bibtex":"@inproceedings{yang2018toit,\n\tauthor = {Wenhua Yang, Chang Xu, Minxue Pan, Xiaoxing Ma, and Jian Lu},\n\ttitle = {Improving Verification Accuracy of CPS by Modeling and Calibrating Interaction Uncertainty},\n\tjournal = {ACM Transactions on Internet Technology (TOIT)},\n\tyear = {2018},\n\tpages = {1-37}\n\t}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Is Spreadsheet Ambiguity Harmful? Detecting and Repairing Spreadsheet Smells due to Ambiguous Computation","date":"2014","authors":["Wensheng Dou","Shing-Chi Cheung","Jun Wei"],"venue":"36th International Conference on Software Engineering (ICSE 2014), Hyderabad, India, May-Jun 2014","venueShort":"ICSE","tags":[],"abstract":"\n Spreadsheets are widely used by end users for numerical computation in their business. Spreadsheet cells whose computation is subject to the same semantics are often clustered in a row or column. When a spreadsheet evolves, these cell clusters can degenerate due to ad hoc modifications or undisciplined copy-and-pastes. Such degenerated clusters no longer keep cells prescribing the same computational semantics, and are said to exhibit ambiguous computation smells. Our empirical study finds that such smells are common and likely harmful. We propose AmCheck, a novel technique that automatically detects and repairs ambiguous computation smells by recovering their intended computational semantics. A case study using AmCheck suggests that it is useful for discovering and repairing real spreadsheet problems.\n ","paperUrl":"http://dl.acm.org/citation.cfm?doid=2568225.2568316","bibtex":"@inproceedings{DBLP:conf/icse/DouCW14,\n author = {Wensheng Dou and\n Shing{-}Chi Cheung and\n Jun Wei},\n title = {Is spreadsheet ambiguity harmful? detecting and repairing spreadsheet\n smells due to ambiguous computation},\n booktitle = {36th International Conference on Software Engineering, {ICSE} '14,\n Hyderabad, India - May 31 - June 07, 2014},\n pages = {848--858},\n year = {2014},\n crossref = {DBLP:conf/icse/2014},\n url = {http://doi.acm.org/10.1145/2568225.2568316},\n doi = {10.1145/2568225.2568316},\n timestamp = {Mon, 14 Sep 2015 15:13:50 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/DouCW14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Detecting Table Clones and Smells in Spreadsheets","date":"2016","authors":["Wensheng Dou","Shing-Chi Cheung","Chushu Gao","Chang Xu","Liang Xu","Jun Wei"],"venue":"24th ACM SIGSOFT International Symposium on the Foundations of Software Engineering (FSE 2016), Seattle, WA, USA, Nov 2016","venueShort":"FSE ","tags":[],"abstract":"\n Spreadsheets are widely used by end users for various business tasks, such as data analysis and financial reporting. End users may perform similar tasks by cloning a block of cells (table) in their spreadsheets. The corresponding cells in these cloned tables are supposed to keep the same or similar computational semantics. However, when spreadsheets evolve, thus cloned tables can become inconsistent due to ad-hoc modifications, and as a result suffer from smells. In this paper, we propose TableCheck to detect table clones and related smells due to inconsistency among them. We observe that two tables with the same header information at their corresponding cells are likely to be table clones. Inspired by existing finger-print-based code clone detection techniques, we developed a detection algorithm to detect this kind of table clones. We further detected outliers among corresponding cells as smells in the detected table clones. We implemented our idea into TableCheck, and applied it to real-world spreadsheets from the EUSES corpus. Experimental results show that table clones commonly exist (21.8%), and 25.6% of the spreadsheets with table clones suffer from smells due to inconsistency among these clones. TableCheck detected table clones and their smells with a precision of 92.2% and 85.5%, respectively, while existing techniques detected no more than 35.6% true smells that TableCheck could detect.\n ","paperUrl":"http://sccpu2.cse.ust.hk/castle/materials/fse16main-mainid258-p-e95dd6b-29549-preprint.pdf","slidesUrl":"http://sccpu2.cse.ust.hk/castle/materials/TableCheck_2016_11-17-1.pdf","bibtex":"@inproceedings{Dou_FSE16,\n\tauthor = {Wensheng Dou and Shing{-}Chi Cheung and Chushu Gao and Chang Xu and Liang Xu and Jun Wei},\n\ttitle = {Detecting Table Clones and Smells in Spreadsheets},\n\tbooktitle = {Proceedings of the 2016 International Symposium on the Foundations of Software Engineering, {FSE} 2016},\n\tyear = {2016}\n}","arxivUrl":null,"projectUrl":null,"awards":[]},{"title":"VEnron: A Versioned Spreadsheet Corpus and Related Evolution Analysis","date":"2016","authors":["Wensheng Dou","Liang Xu","Shing-Chi Cheung","Chushu Gao","Jun Wei","Tao Huang"],"venue":"38th International Conference on Software Engineering (ICSE 2016 - SEIP), Companion Volume, Austin, TX, USA, May 2016","venueShort":"ICSE SEIP","tags":[],"abstract":"\n In this paper, we propose a semi-automated approach that leverages spreadsheets’ contexts (e.g., attached emails) and contents to identify evolved spreadsheets and recover the embedded version information. We apply it to the released email archive of the Enron Corporation and build an industrial-scale, versioned spreadsheet corpus VEnron. Our approach first clusters spreadsheets that likely evolved from one to another into evolution groups based on various fragmented information, such as spreadsheet filenames, spreadsheet contents, and spreadsheet-attached emails. Then, it recovers the version information of the spreadsheets in each evolution group. VEnron enables us to identify interesting issues that can arise from spreadsheet evolution. For example, the versioned spreadsheets popularly exist in the Enron email archive; changes in formulas are common; and some groups (16.9%) can introduce new errors during evolution.\nAccording to our knowledge, VEnron is the first spreadsheet corpus with version information. It provides a valuable resource to understand issues arising from spreadsheet evolution.\n ","paperUrl":"http://delivery.acm.org/10.1145/2890000/2889238/p162-dou.pdf?ip=175.159.126.8&id=2889238&acc=ACTIVE%20SERVICE&key=CDD1E79C27AC4E65%2EFC30B8D6EF32B758%2E4D4702B0C3E38B35%2E4D4702B0C3E38B35&CFID=836117825&CFTOKEN=34377724&__acm__=1473671848_9a79ceac0a81a74ac3ee0d6561cb8330","projectUrl":"http://sccpu2.cse.ust.hk/venron/","bibtex":"@inproceedings{Dou_ICSE2016,\n author = {Wensheng Dou and\n Liang Xu and\n Shing{-}Chi Cheung and\n Chushu Gao and\n Jun Wei and\n Tao Huang},\n title = {VEnron: a versioned spreadsheet corpus and related evolution analysis},\n booktitle = {Proceedings of the 38th International Conference on Software Engineering,\n {ICSE} 2016, Austin, TX, USA, May 14-22, 2016 - Companion Volume},\n pages = {162--171},\n year = {2016},\n crossref = {DBLP:conf/icse/2016c},\n url = {http://doi.acm.org/10.1145/2889160.2889238},\n doi = {10.1145/2889160.2889238},\n timestamp = {Sun, 15 May 2016 12:23:10 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/DouXCGWH16},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"CACheck: Detecting and Repairing Cell Arrays in Spreadsheets","date":"2017","authors":["Wensheng Dou","Chang Xu","Shing-Chi Cheung","Jun Wei"],"venue":"IEEE Transactions on Software Engineering (TSE)","venueShort":"TSE","tags":[],"abstract":"\n Spreadsheets are widely used by end users for numerical computation in their business. Spreadsheet cells whose computation is subject to the same semantics are often clustered in a row or column as a cell array. When a spreadsheet evolves, the cells in a cell array can degenerate due to ad hoc modifications. Such degenerated cell arrays no longer keep cells prescribing the same computational semantics, and are said to exhibit ambiguous computation smells. We propose CACheck, a novel technique that automatically detects and repairs smelly cell arrays by recovering their intended computational semantics. Our empirical study on the EUSES and Enron corpora finds that such smelly cell arrays are common. Our study also suggests that CACheck is useful for detecting and repairing real spreadsheet problems caused by smelly cell arrays. Compared with our previous work AmCheck, CACheck detects smelly cell arrays with higher precision and recall rate.\n ","paperUrl":"https://doi.org/10.1109/TSE.2016.2584059","bibtex":"@article{Dou_TSE17,\n\tauthor = {Wensheng Dou, Chang Xu, Shing-Chi Cheung and Jun Wei},\n\ttitle = {CACheck: Detecting and Repairing Cell Arrays in Spreadsheets},\n\tjournal = {IEEE Transactions on Software Engineering (TSE)},\n\tyear = {2017}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"GreenDroid: Automated Diagnosis of Energy Inefficiency for Smartphone Applications","date":"2014","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung","Jian Lv"],"venue":"IEEE Transactions on Software Engineering 40(9), September 2014","venueShort":"TSE","tags":[],"abstract":"\n Smartphone applications’ energy efficiency is vital, but many Android applications suffer from serious energy inefficiency problems. Locating these problems is labor-intensive and automated diagnosis is highly desirable. However, a key challenge is the lack of a decidable criterion that facilitates automated judgment of such energy problems. Our work aims to address this challenge. We conducted an in-depth study of 173 open-source and 229 commercial Android applications, and observed two common causes of energy problems: missing deactivation of sensors or wake locks, and cost-ineffective use of sensory data. With these findings, we\npropose an automated approach to diagnosing energy problems in Android applications. Our approach explores an application’s state space by systematically executing the application using Java PathFinder (JPF). It monitors sensor and wake lock operations to detect missing deactivation of sensors and wake locks. It also tracks the transformation and usage of sensory data and judges whether they are effectively utilized by the application using our state-sensitive data utilization metric. In this way, our approach can generate detailed reports with actionable information to assist developers in validating detected energy problems. We built our approach as a tool, GreenDroid, on top of JPF. Technically, we addressed the challenges of generating user interaction events and scheduling event han- dlers in extending JPF for analyzing Android applications. We evaluated GreenDroid using 13 real-world popular Android applications. GreenDroid completed energy efficiency diagnosis for these applications in a few minutes. It successfully located real energy problems in these applications, and additionally found new unreported energy problems that were later confirmed by developers.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/TSE2014.pdf","projectUrl":"http://sccpu2.cse.ust.hk/greendroid/","bibtex":"@ARTICLE{Liu:TSE2014, \n author = {Liu, Yepang and Xu, Chang and Cheung, Shing-Chi and Lu, Jian}, \n journal = {IEEE Transactions on Software Engineering}, \n title = {GreenDroid: Automated Diagnosis of Energy Inefficiency for Smartphone Applications}, \n year = {2014}, \n volume = {40}, \n number = {9}, \n pages = {911-940}, \n doi = {10.1109/TSE.2014.2323982}, \n month = {Sept},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"CHECKERDROID: Automated Quality Assurance for Smartphone Applications","date":"2014","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung","Wenhua Yang"],"venue":"International Journal of Software and Informatics (IJSI)","venueShort":"IJSI","tags":[],"abstract":"\n Smartphone applications’ quality is vital. However, many smartphone applications on market suffer from various bugs. One major reason is that developers lack viable techniques to help expose potential bugs in their applications. This paper presents a practical dynamic analysis tool, CheckerDroid, to help developers automatically detect both functional and non-functional bugs in their Android applications. CheckerDroid currently supports the detection of the following three types of bugs: null pointer exception, resource leak and sensor listener misusage. We built CheckerDroid by extending Java PathFinder (JPF), a widely-used model checker for general Java programs. Our extension addresses two technical challenges. First, Android applications are event-driven and lack explicit control flow information between event handlers. Second, Android applications closely hinge on native framework libraries, whose implementations are platform-dependent. To address these challenges, we derive event handler scheduling policies from Android documentations, and encode them to guide CheckerDroid to realistically execute Android applications. Besides, we modeled the side effects for a critical set of Android APIs such that CheckerDroid can conduct bug detection precisely. To evaluate CheckerDroid, we conducted experiments with seven popular real-world Android applications. CheckerDroid analyzed these applications in a few minutes, and successfully located real bugs in them.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/IJSI2014.pdf","bibtex":"@article{DBLP:journals/ijsi/LiuXCY14,\n author = {Yepang Liu and\n Chang Xu and\n S. C. Cheung and\n Wenhua Yang},\n title = {{CHECKERDROID} : Automated Quality Assurance for Smartphone Applications},\n journal = {Int. J. Software and Informatics},\n volume = {8},\n number = {1},\n pages = {21--41},\n year = {2014},\n url = {http://www.ijsi.org/ch/reader/view_abstract.aspx?file_no=i181},\n timestamp = {Sun, 14 Aug 2016 14:06:59 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/ijsi/LiuXCY14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Characterizing and Detecting Performance Bugs for Smartphone Applications","date":"2014","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung"],"venue":"36th International Conference on Software Engineering (ICSE 2014), Hyderabad, India, May-Jun 2014","venueShort":"ICSE","tags":["Android","Empirical Study"],"awards":["Distinguished Paper"],"abstract":"\n Smartphone applications’ performance has a vital impact on user experience. However, many smartphone applications suffer from bugs that cause significant performance degradation, thereby losing their competitive edge. Unfortunately, people have little understanding of these performance bugs. They also lack effective techniques to fight with such bugs. To bridge this gap, we conducted a study of 70 real-world performance bugs collected from eight large-scale and popular Android applications. We studied the characteristics (e.g., bug types and how they manifested) of these bugs and identified their common patterns. These findings can support follow-up research on performance bug avoidance, testing, debugging and analysis for smartphone applications. To demonstrate the usefulness of our findings, we implemented a static code analyzer, PerfChecker, to detect our identified performance bug patterns. We experimentally evaluated PerfChecker by applying it to 29 popular Android applications, which comprise 1.1 million lines of Java code. PerfChecker successfully detected 126 matching instances of our performance bug patterns. Among them, 68 were quickly confirmed by developers as previouslynunknown issues that affect application performance, and 20 were fixed soon afterwards by following our optimization suggestions.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/ICSE2014.pdf","projectUrl":"http://sccpu2.cse.ust.hk/perfchecker","bibtex":"@inproceedings{DBLP:conf/icse/LiuXC14,\n author = {Yepang Liu and\n Chang Xu and\n Shing{-}Chi Cheung},\n title = {Characterizing and detecting performance bugs for smartphone applications},\n booktitle = {36th International Conference on Software Engineering, {ICSE} '14,\n Hyderabad, India - May 31 - June 07, 2014},\n pages = {1013--1024},\n year = {2014},\n crossref = {DBLP:conf/icse/2014},\n url = {http://doi.acm.org/10.1145/2568225.2568229},\n doi = {10.1145/2568225.2568229},\n timestamp = {Sun, 18 May 2014 16:12:57 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/LiuXC14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"slidesUrl":null},{"title":"Diagnosing Energy Efficiency and Performance for Mobile Internetware Applications: Challenges and Opportunities","date":"2015","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung"],"venue":"IEEE Software 32(1), Jan/Feb 2015","venueShort":"IEEE SOFTWARE","tags":[],"abstract":"\n Many smartphone applications' smart services are realized in a way that wastes energy or degrades performance, seriously affecting the user experience. What's worse, developers lack powerful tools to combat such problems, curbing the growth of Internet-based mobile computing. Research communities and industries have issued a strong call for effective techniques to diagnose energy and performance bugs in smartphone applications. This article describes bug characteristics, discusses diagnostic challenges, and reviews state-of-the-art diagnostic techniques. A case study shows how a representative tool analyzed commercial Android applications and the Samsung Mobile Software Developer's Kit, providing useful diagnostic information.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/ieeesoft15.pdf","bibtex":"@article{DBLP:journals/software/LiuXC15,\n author = {Yepang Liu and\n Chang Xu and\n Shing{-}Chi Cheung},\n title = {Diagnosing Energy Efficiency and Performance for Mobile Internetware\n Applications},\n journal = {{IEEE} Software},\n volume = {32},\n number = {1},\n pages = {67--75},\n year = {2015},\n url = {http://dx.doi.org/10.1109/MS.2015.4},\n doi = {10.1109/MS.2015.4},\n timestamp = {Tue, 12 Jan 2016 12:01:52 +0100},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/software/LiuXC15},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Understanding and Detecting Wake Lock Misuses for Android Applications","date":"2016","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung","Valerio Terrangi"],"venue":"24th ACM SIGSOFT International Symposium on the Foundations of Software Engineering (FSE 2016), Seattle, WA, USA, Nov 2016","venueShort":"FSE ","tags":[],"abstract":"\n Wake locks are widely used in Android apps to protect critical computations from being disrupted by device sleeping. Inappropriate use of wake locks often seriously impacts user experience. However, little is known on how wake locks are used in real-world Android apps and the impact of their misuses. To bridge the gap, we conducted a large-scale empirical study on 44,736 commercial and 31 open-source Android apps. By automated program analysis and manual investigation, we observed (1) common program points where wake locks are acquired and released, (2) 13 types of critical computational tasks that are often protected by wake locks, and (3) eight patterns of wake lock misuses that commonly cause functional and non-functional issues, only three of which had been studied by existing work. Based on our findings, we designed a static analysis technique, Elite, to detect two most common patterns of wake lock misuses. Our experiments on real-world subjects showed that Elite is effective and can outperform two state-of-the-art techniques.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/FSE2016.pdf","slidesUrl":"http://sccpu2.cse.ust.hk/castle/materials/ELITE-FSE2016-V3.pdf","bibtex":"@inproceedings{Liu_FSE16,\n\tauthor = {Yepang Liu and Chang Xu and\n\t\t \t Shing{-}Chi Cheung and Valerio Terragni},\n\ttitle = {Understanding and Detecting Wake Lock Misuses for Android Applications},\n\tbooktitle = {Proceedings of the 2016 International Symposium on the Foundations of Software Engineering, {FSE} 2016},\n\tyear = {2016}\n}","arxivUrl":null,"projectUrl":null,"awards":[]},{"title":"DroidLeaks: a comprehensive database of resource leaks in Android apps","date":"2019","authors":["Yepang Liu","Jue Wang","Lili Wei","Chang Xu","Shing-Chi Cheung","Tianyong Wu","Jun Yan","Jian Zhang"],"venue":"Empirical Software Engineering 2019","venueShort":"EmSE","tags":[],"abstract":"\n Resource leaks in Android apps are pervasive. They can cause serious performance degradation and system crashes. In recent years, many resource leak detection techniques have been proposed to help Android developers correctly manage system resources. Yet, there exist no common databases of real-world bugs for effectively comparing such techniques to understand their strengths and limitations. This paper describes our effort towards constructing such a bug database named DROIDLEAKS. To extract real resource leak bugs, we mined 124,215 code revisions of 34 popular open-source Android apps. After automated filtering and manual validation, we successfully found 292 fixed resource leak bugs, which cover a diverse set of resource classes, from 32 analyzed apps. To understand these bugs, we conducted an empirical study, which revealed the characteristics of resource leaks in Android apps and common patterns of resource management mistakes made by developers. To further demonstrate the usefulness of our work, we evaluated eight resource leak detectors from both academia and industry on DROIDLEAKS and performed a detailed analysis of their performance. We release DROIDLEAKS for public access to support future research.\n ","paperUrl":"https://link.springer.com/article/10.1007/s10664-019-09715-8","projectUrl":"https://zenodo.org/record/2589909#.XfxlvZP7TOR","bibtex":"@article{DBLP:journals/ese/LiuWWXCWYZ19,\n author = {Yepang Liu and\n Jue Wang and\n Lili Wei and\n Chang Xu and\n Shing{-}Chi Cheung and\n Tianyong Wu and\n Jun Yan and\n Jian Zhang},\n title = {DroidLeaks: a comprehensive database of resource leaks in Android\n apps},\n journal = {Empirical Software Engineering},\n volume = {24},\n number = {6},\n pages = {3435--3483},\n year = {2019},\n url = {https://doi.org/10.1007/s10664-019-09715-8},\n doi = {10.1007/s10664-019-09715-8},\n timestamp = {Thu, 19 Dec 2019 09:26:48 +0100},\n biburl = {https://dblp.org/rec/bib/journals/ese/LiuWWXCWYZ19},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Automatic Software Refactoring via Weighted Clustering in Method-level Networks","date":"2018","authors":["Ying Wang","Hai Yu","Zhiliang Zhu","Wei Zhang","Yuli Zhao"],"venue":"IEEE Transactions on Software Engineering (TSE)","venueShort":"TSE","tags":[],"abstract":"\n In this study, we describe a system-level multiple refactoring algorithm, which can identify the move method, move field, and extract class refactoring opportunities automatically according to the principle of “high cohesion and low coupling.” The algorithm works by merging and splitting related classes to obtain the optimal functionality distribution from the system-level. Furthermore, we present a weighted clustering algorithm for regrouping the entities in a system based on merged method-level networks. Using a series of preprocessing steps and preconditions, the “bad smells” introduced by cohesion and coupling problems can be removed from both the non-inheritance and inheritance hierarchies without changing the code behaviors. We rank the refactoring suggestions based on the anticipated benefits that they bring to the system. Based on comparisons with related research and assessing the refactoring results using quality metrics and empirical evaluation, we show that the proposed approach performs well in different systems and is beneficial from the perspective of the original developers. Finally, an open source tool is implemented to support the proposed approach.\n ","paperUrl":"materials/TSE18-ying.pdf","projectUrl":"https://github.com/wangying8052/REsolution_runnable-JAR-File","bibtex":"@article{wang2018automatic,\n title={Automatic Software Refactoring via Weighted Clustering in Method-Level Networks},\n author={Ying, Wang and Hai, Yu and Zhiliang, Zhu and Wei, Zhang and Yuli, Zhao},\n journal={IEEE Transactions on Software Engineering},\n volume={44},\n number={3},\n pages={202--236},\n year={2018},\n publisher={IEEE}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Risk Analysis on Multi-granular Network for Software Integration Testing","date":"2018","authors":["Ying Wang","Zhiliang Zhu","Hai Yu"],"venue":"IEEE Transactions on Circuits and Systems II: Express Briefs (TCAS2)","venueShort":"TCAS2","tags":[],"abstract":"\n This brief presents a model, a methodology, and an application scheme of risk assessment for information exchange system. The multi-granular flow network (MGFN) model serves as a basis for measuring the vulnerabilities and threats of components, and the failure consequences they bring to the system when a failure occurs. The risk factors of components are then quantified, assisted by a probabilistic risk analysis model. Furthermore, we apply the MGFN model and the risk assessment scheme in ordering class integration testing for object-oriented software system. By comparing our approach with the state-of-the-art integration test order algorithms from the perspectives of detection efficiency of severe faults and stubbing efforts, we show that classes with higher risk indexes can be tested in earlier integration steps, and that the total complexity of the established test stubs is minimized.\n ","paperUrl":"materials/TCAS218-ying.pdf","bibtex":"@article{wang2018risk,\n title={Risk Analysis on Multi-Granular Flow Network for Software Integration Testing},\n author={Ying, Wang and Zhiliang, Zhu and Hai, Yu and Bo, Yang},\n journal={IEEE Transactions on Circuits and Systems II: Express Briefs},\n volume={65},\n number={8},\n pages={1059--1063},\n year={2018},\n publisher={IEEE}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Do the Dependency Conflicts in My Project Matter?","date":"2018","authors":["Ying Wang","Ming Wen","Zhenwei Liu","Rongxin Wu","Rui Wang","Bo Yang","Hai Yu","Zhiliang Zhu","Shing-Chi Cheung"],"venue":" The ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, Technical Research Paper, Lake Buena Vista, Florida, 4 Nov - 9 Nov 2018","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Intensive dependencies of a Java project on third-party libraries can easily lead to the presence of multiple library or class versions on its classpath. When this happens, JVM will load one version and shadows the others. Dependency conflict (DC) issues occur when the loaded version fails to cover a required feature (e.g., method) referenced by the project, thus causing runtime exceptions. However, the warnings of duplicate classes or libraries detected by existing build tools such as Maven can be benign since not all instances of duplication will induce runtime exceptions, and hence are often ignored by developers. In this paper, we conducted an empirical study on real-world DC issues collected from large open source projects. We studied the manifestation and fixing patterns of DC issues. Based on our findings, we designed Decca, an automated detection tool that assesses DC issues' severity and filters out the benign ones. Our evaluation results on 30 projects show that Decca achieves a precision of 0.923 and recall of 0.766 in detecting high-severity DC issues. Decca also detected new DC issues in these projects. Subsequently, 20 DC bug reports were filed, and 11 of them were confirmed by developers. Issues in 6 reports were fixed with our suggested patches.\n ","paperUrl":"materials/fse18-ying.pdf","projectUrl":"https://deccadc.github.io/fse18/","slidesUrl":"materials/fse18-ying-slides.pdf","bibtex":"@inproceedings{wang2018conflict,\n title={Do the Dependency Conflicts in My Project Matter?},\n author={Wang, Ying and Wen, Ming and Liu, Zhenwei and Wu, Rongxin and Wang, Rui and Yang, Bo and Yu, Hai and Zhu, Zhiliang and Cheung, Shing-Chi},\n booktitle={Proceedings of the 2018 26th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE 2018)},\n pages={1--12},\n year={2018},\n organization={ACM}\n}","arxivUrl":null,"awards":[]},{"title":"Using Risk Analysis to Prioritize Test Cases","date":"2018","authors":["Ying Wang","Hai Yu","Zhiliang Zhu"],"venue":"Journal of Systems and Software (JSS)","venueShort":"JSS","tags":[],"abstract":"\n In this paper, we present a risk-based test case prioritization (Ri-TCP) algorithm based on the transmission of information flows among software components. Most of the existing approaches rely on the historical code changes or test case execution data, few of them effectively use the system topology information covered by test cases when scheduling the execution of test cases. From the perspective of code structure, the proposed algorithm firstly maps software into an information flow-based directed network model. Then, functional paths covered by each test case are represented by a set of barbell motifs. Finally, combining with probabilistic risk analysis (PRA) and fault tree model, we assign a priority to each test case by calculating the sum of risk indexes of all the barbells covered by it. Experimental results demonstrate that Ri-TCP technique has a higher detection rate of faults with serious risk indicators and performs stably in different systems, compared with the other state-of-the-art algorithms.\n ","paperUrl":"materials/JSS18-ying.pdf","bibtex":"@article{wang2018using,\n title={Using reliability risk analysis to prioritize test cases},\n author={Ying, Wang and Zhiliang, Zhu and Bo,Yang and Fangda, Guo and Hai,Yu},\n journal={Journal of Systems and Software},\n volume={139},\n pages={14--31},\n year={2018},\n publisher={Elsevier}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Could I Have a Stack Trace to Examine the Dependency Conflict Issue?","date":"2019","authors":["Ying Wang","Ming Wen","Rongxin Wu","Zhenwei Liu","Shin Hwei Tan","Zhiliang Zhu","Hai Yu","Shing-Chi Cheung"],"venue":"International Conference on Software Engineering 2019, Technical Research Paper, Montréal, QC, Canada, 25 May - 31 May","venueShort":"ICSE","tags":[],"abstract":"\n Intensive use of libraries in Java projects brings potential risk of dependency conflicts, which occur when a project directly or indirectly depends on multiple versions of the same library or class. When this happens, JVM loads one version and shadows the others. Runtime exceptions can occur when methods in the shadowed versions are referenced. Although project management tools such as Maven are able to give warnings of potential dependency conflicts when a project is built, developers often ask for crashing stack traces before examining these warnings. It motivates us to develop RIDDLE, an automated approach that generates tests and collects crashing stack traces for projects subject to risk of dependency conflicts. RIDDLE, built on top of ASM and EVOSUITE, combines condition mutation, search strategies and condition restoration. We applied RIDDLE on 19 real-world Java projects with duplicate libraries or classes. We reported 20 identified dependency conflicts including their induced crashing stack traces and the details of generated tests. Among them, 15 conflicts were confirmed by developers as real issues, and 10 were readily fixed. The evaluation results demonstrate the effectiveness and usefulness of RIDDLE.\n ","paperUrl":"materials/ICSE19-ying.pdf","projectUrl":"https://skillwind.github.io/RiddleDC/index.html","bibtex":"@inproceedings {WANG2019STACK,\n title = {{Could I Have a Stack Trace to Examine the Dependency Conflict Issue?}},\n author = {Ying, Wang and Ming, Wen and Rongxin, Wu and Zhenwei, Liu and Shin Hwei, Tan and Zhiliang, Zhu and Hai, Yu and Shing-Chi, Cheung},\n booktitle = {{Proceedings of the 41th International Conference on Software Engineering}},\n series = {ICSE 2019},\n year = {2019},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Scaling Up Symbolic Analysis by Removing Z-Equivalent States","date":"2014","authors":["Yueqi Li","Shing-Chi Cheung","Xiangyu Zhang","Yepang Liu"],"venue":"ACM Transactions on Software Engineering and Methodology 23(4), August 2014","venueShort":"TOSEM","tags":[],"abstract":"\n Path explosion is a major issue in applying path-sensitive symbolic analysis to large programs. We ob- serve that many symbolic states generated by the symbolic analysis of a procedure are indistinguishable to its callers. It is, therefore, possible to keep only one state from each set of equivalent symbolic states without affecting the analysis result. Based on this observation, we propose an equivalence relation called z-equivalence, which is weaker than logical equivalence, to relate a large number of z-equivalent states. We prove that z-equivalence is strong enough to guarantee that paths to be traversed by the symbolic analysis of two z-equivalent states are identical, giving the same solutions to satisfiability and validity queries. We propose a sound linear algorithm to detect z-equivalence. Our experiments show that the symbolic analysis that leverages z-equivalence is able to achieve more than ten orders of magnitude reduction in terms of search space. The reduction significantly alleviates the path explosion problem, enabling us to apply symbolic analysis in large programs such as Hadoop and Linux Kernel.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/tosem14.pdf","bibtex":"@article{Li:TOSEM2014,\n author = {Li, Yueqi and Cheung, Shing-Chi and Zhang, Xiangyu and Liu, Yepang},\n title = {Scaling Up Symbolic Analysis by Removing Z-Equivalent States},\n journal = {ACM Trans. Softw. Eng. Methodol.},\n issue_date = {August 2014},\n volume = {23},\n number = {4},\n month = sep,\n year = {2014},\n pages = {34:1--34:32},\n articleno = {34},\n url = {http://doi.acm.org/10.1145/2652484},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Symbolic State Validation through Runtime Data","date":"2014","authors":["Yueqi Li","Shing-Chi Cheung"],"venue":"29th IEEE/ACM International Conference on Automated Software Engineering (ASE 2014), Vasteras, Sweden, September 2014","venueShort":"ASE","tags":[],"abstract":"\n Real world programs are typically built on top of many library functions. Symbolic analysis of these programs generally requires precise models of these functions? Application Programming Interfaces (APIs), which are mostly unavailable because these models are costly to construct. A variant approach of symbolic analysis is to over-approximate the return values of those APIs that have not been modeled. However, such approximation can induce many unreachable symbolic states, which are expensive to validate manually. In this paper, we propose a static approach to automatically validating the reported anomalous symbolic states. The validation makes use of the available runtime data of the un-modeled APIs collected from previous program executions. We show that the symbolic state validation problem can be cast as a MAX-SAT problem and solved by existing constraint solvers.\n\nOur approach is motivated by two observations. We may bind the symbolic parameters in un-modeled APIs based on observations made in former executions by other programs. The binding enables us to use the corresponding observed concrete return values of APIs to validate the symbolic states arising from the over-approximated return values of the un-modeled APIs. Second, some symbolic constraints can be accurately evaluated despite the imprecision of the over-approximated symbolic values.\n\nOur technique found 80 unreported bugs when it was applied to 10 popular programs with a total of 1.5 million lines of code. All of them can be confirmed by test cases. Our technique presents a promising way to apply the big data paradigm to software engineering. It provides a mechanism to validate the symbolic states of a project by leveraging the many concrete input-output values of APIs collected from other projects.\n ","paperUrl":"http://dl.acm.org/citation.cfm?doid=2642937.2642973","bibtex":"@inproceedings{DBLP:conf/kbse/LiC14,\n author = {Yueqi Li and\n Shing{-}Chi Cheung},\n title = {Symbolic state validation through runtime data},\n booktitle = {{ACM/IEEE} International Conference on Automated Software Engineering,\n {ASE} '14, Vasteras, Sweden - September 15 - 19, 2014},\n pages = {187--198},\n year = {2014},\n crossref = {DBLP:conf/kbse/2014},\n url = {http://doi.acm.org/10.1145/2642937.2642973},\n doi = {10.1145/2642937.2642973},\n timestamp = {Fri, 07 Nov 2014 12:44:47 +0100},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/kbse/LiC14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"An Empirical Study on TensorFlow Program Bugs","date":"2018","authors":["Yuhao Zhang","Yifan Chen","Shing-Chi Cheung","Yingfei Xiong","Lu Zhang"],"venue":"International Symposium on Software Testing and Analysis, Amsterdam Netherlands, July 2018","venueShort":"ISSTA","tags":[],"abstract":"\n Deep learning applications become increasingly popular in important domains such as self-driving systems and facial identity systems. Defective deep learning applications may lead to catastrophic consequences. Although recent research efforts were made on testing and debugging deep learning applications, the characteristics of deep learning defects have never been studied. To fill this gap, we studied deep learning applications built on top of TensorFlow and collected program bugs related to TensorFlow from StackOverflow QA pages and Github projects. We extracted information from QA pages, commit messages, pull request messages, and issue discussions to examine the root causes and symptoms of these bugs. We also studied the strategies deployed by TensorFlow users for bug detection and localization. These findings help researchers and TensorFlow users to gain a better understanding of coding defects in TensorFlow programs and point out a new direction for future research.\n ","paperUrl":"materials/issta18main-p98-p.pdf","bibtex":"@inproceedings {ISSTA18,\n title = {{An Empirical Study on TensorFlow Program Bugs}},\n author = {Yuhao Zhang, Yifan Chen, Shing-Chi Cheung, Yingfei Xiong, Lu Zhang},\n booktitle = {{Proceedings of The ACM SIGSOFT International Symposium on Software Testing and Analysis}},\n year = {2018},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"ReScue: Crafting Regular Expression DoS Attacks","date":"2018","authors":["Yuju Shen","Yanyan Jiang","Chang Xu","Ping Yu","Xiaoxing Ma","Jian Lu"],"venue":"2018 33rd ACM/IEEE International Conference on Automated Software Engineering (ASE '18), September 2018, Montpellier, France","venueShort":"ASE","tags":[],"abstract":"\n Regular expression (regex) with modern extensions is one of the most popular string processing tools. However, poorly-designed regexes can yield exponentially many matching steps, and lead to regex Denial-of-Service (ReDoS) attacks under well-conceived string inputs. This paper presents ReScue, a three-phase gray-box analytical technique, to automatically generate ReDoS strings to highlight vulnerabilities of given regexes. ReScue systematically seeds (by a genetic search), incubates (by another genetic search), and finally pumps (by a regex-dedicated algorithm) for generat- ing strings with maximized search time. We implemenmted the ReScue tool and evaluated it against 29,088 practical regexes in real-world projects. The evaluation results show that ReScue found 49% more attack strings compared with the best existing technique, and applying ReScue to popular GitHub projects discovered ten previously unknown ReDoS vulnerabilities.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/ASE18.pdf","projectUrl":"http://2bdenny.github.io/ReScue/","bibtex":"@inproceedings{shen_rescue_2018,\n author = {Yuju Shen and Yanyan Jiang and Chang Xu and Ping Yu and Xiaoxing Ma and Jian Lu},\n title = {ReScue: Crafting regular expression DoS attacks},\n pages = {to appear},\n year = {2018},\n booktitle = {Proceedings of the 33rd International Conference on Automated Software Engineering (ASE)},\n pdf = {/spar/publication/shen_rescue_2018.pdf},\n code = {http://2bdenny.github.io/ReScue/},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Analyzing and Disentangling Interleaved Interrupt-Driven IoT Programs","date":"2019","authors":["Yuxia Sun","Song Guo","Shing-Chi Cheung","Yong Tang"],"venue":"IEEE Internet of Things Journal 2019","venueShort":"IoT-J","tags":[],"abstract":"\n In the Internet of Things (IoT) community, wireless sensor network (WSN) is a key technique to enable ubiquitous sensing of environments and provide reliable services to applications. WSN programs, typically interrupt-driven, implement the functionalities via the collaboration of interrupt procedure instances (IPIs, namely executions of interrupt processing logic). However, due to the complicated concurrency model of WSN programs, the IPIs are interleaved intricately and the program behaviors are hard to predicate from the source codes. Thus, to improve the software quality of WSN programs, it is significant to disentangle the interleaved executions and develop various IPI-based program analysis techniques, including offline and online ones. As the common foundation of those techniques, a generic efficient and real-time algorithm to identify IPIs is urgently desired. However, the existing instance-identification approach cannot satisfy the desires. In this paper, we first formally define the concept of IPI. Next, we propose a generic IPI-identification algorithm, and prove its correctness, real-time, and efficiency. We also conduct comparison experiments to illustrate that our algorithm is more efficient than the existing one in terms of both time and space. As the theoretical analyses and empirical studies exhibit, our algorithm provides the groundwork for IPI-based analyses of WSN programs in IoT environment.\n ","paperUrl":"https://ieeexplore.ieee.org/document/8648188","bibtex":"@article{DBLP:journals/iotj/SunGCT19,\n author = {Yuxia Sun and\n Song Guo and\n Shing{-}Chi Cheung and\n Yong Tang},\n title = {Analyzing and Disentangling Interleaved Interrupt-Driven IoT Programs},\n journal = {{IEEE} Internet of Things Journal},\n volume = {6},\n number = {3},\n pages = {5376--5386},\n year = {2019},\n url = {https://doi.org/10.1109/JIOT.2019.2900769},\n doi = {10.1109/JIOT.2019.2900769},\n timestamp = {Fri, 05 Jul 2019 09:39:40 +0200},\n biburl = {https://dblp.org/rec/bib/journals/iotj/SunGCT19},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Disclosing and Locating Concurrency Bugs of Interrupt-Driven IoT Programs","date":"2019","authors":["Yuxia Sun","Shing-Chi Cheung","Song Guo","Ming Cheng"],"venue":"IEEE Internet of Things Journal 2019","venueShort":"IoT-J","tags":[],"abstract":"\n The Internet of Things (IoT) is envisioned as a distributed network formed by many end devices, e.g., the motes of wireless sensor network (WSN). These important IoT end devices enable ubiquitous sensing of environments and provide reliable services for mission-critical applications. However, programs running on WSN devices are typically interrupt-driven and prone to interrupt-induced concurrency bugs, which are primarily caused by erroneous interleavings among interrupt procedure instances (IPIs) (namely, executions of interrupt processing logic). In this paper, we use a set of dynamic bug patterns to characterize the concurrency bugs due to buggy access-interleavings among IPIs to shared resources, including shared memory locations and shared communication channels. By matching the above bug patterns, a dynamic analysis approach called disclosing and locating concurrency bugs of interrupt-driven IoT programs based on dynamic bug patterns (Daemon) is proposed to automatically detect and locate concurrency bugs in WSN programs. A GUI tool of Daemon is developed. As the empirical studies exhibit, the tool can discover concurrency bugs effectively and locate the buggy source lines visually.\n ","paperUrl":"https://ieeexplore.ieee.org/document/8746139","bibtex":"@article{DBLP:journals/iotj/SunCGC19,\n author = {Yuxia Sun and\n Shing{-}Chi Cheung and\n Song Guo and\n Ming Cheng},\n title = {Disclosing and Locating Concurrency Bugs of Interrupt-Driven IoT Programs},\n journal = {{IEEE} Internet of Things Journal},\n volume = {6},\n number = {5},\n pages = {8945--8957},\n year = {2019},\n url = {https://doi.org/10.1109/JIOT.2019.2925291},\n doi = {10.1109/JIOT.2019.2925291},\n timestamp = {Thu, 07 Nov 2019 09:19:37 +0100},\n biburl = {https://dblp.org/rec/bib/journals/iotj/SunCGC19},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Fuzzing Deep Learning Compilers with HirGen","date":"2023","authors":["Haoyang Ma","Qingchao Shen","Yongqiang Tian","Junjie Chen","Shing-Chi Cheung"],"venue":"ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","tags":["Deep Learning compiler testing"],"projectUrl":"https://zenodo.org/record/7905120#.ZKptii9ByJ8","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"StubCoder: Automated Generation and Repair of Stub Code for Mock Objects","date":"2023","authors":["Hengcheng Zhu","Lili Wei","Valerio Terragni","Yepang Liu","Shing-Chi Cheung","Jiarong Wu","Qin Sheng","Bing Zhang","Lihong Song"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["Mocking","Unit Test"],"abstract":"Mocking is an essential unit testing technique for isolating the class under test (CUT) from its dependencies. Developers often leverage mocking frameworks to develop stub code that specifies the behaviors of mock objects. However, developing and maintaining stub code is labor-intensive and error-prone. In this paper, we present StubCoder to automatically generate and repair stub code for regression testing. StubCoder implements a novel evolutionary algorithm that synthesizes test-passing stub code guided by the runtime behavior of test cases. We evaluated our proposed approach on 59 test cases from 13 open-source projects. Our evaluation results show that StubCoder can effectively generate stub code for incomplete test cases without stub code and repair obsolete test cases with broken stub code.","paperUrl":"https://doi.org/10.1145/3617171","projectUrl":"https://github.com/henryhchchc","bibtex":"@article{10.1145/3617171,\n author = {Zhu, Hengcheng and Wei, Lili and Terragni, Valerio and Liu, Yepang and Cheung, Shing-Chi and Wu, Jiarong and Sheng, Qin and Zhang, Bing and Song, Lihong},\n title = {StubCoder: Automated Generation and Repair of Stub Code for Mock Objects},\n year = {2023},\n publisher = {Association for Computing Machinery},\n address = {New York, NY, USA},\n issn = {1049-331X},\n url = {https://doi.org/10.1145/3617171},\n doi = {10.1145/3617171},\n abstract = {Mocking is an essential unit testing technique for isolating the class under test (CUT) from its dependencies. Developers often leverage mocking frameworks to develop stub code that specifies the behaviors of mock objects. However, developing and maintaining stub code is labor-intensive and error-prone. In this paper, we present StubCoder to automatically generate and repair stub code for regression testing. StubCoder implements a novel evolutionary algorithm that synthesizes test-passing stub code guided by the runtime behavior of test cases. We evaluated our proposed approach on 59 test cases from 13 open-source projects. Our evaluation results show that StubCoder can effectively generate stub code for incomplete test cases without stub code and repair obsolete test cases with broken stub code.},\n note = {Just Accepted},\n journal = {ACM Trans. Softw. Eng. Methodol.},\n month = {aug},\n keywords = {Test Generation and Repair, Genetic Programming, Software Testing, Mocking, Evolutionary Computation, Program Analysis}\n }","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"MockSniffer: Characterizing and Recommending Mocking Decisions for Unit Tests","date":"2020","authors":["Hengcheng Zhu","Lili Wei","Ming Wen","Yepang Liu","Shing-Chi Cheung","Qin Sheng","Cui Zhou"],"venue":"IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","tags":["Mocking","Unit Test"],"abstract":"In unit testing, mocking is popularly used to ease test effort, reduce test flakiness, and increase test coverage by replacing the actual dependencies with simple implementations. However, there are no clear criteria to determine which dependencies in a unit test should be mocked. Inappropriate mocking can have undesirable consequences: under-mocking could result in the inability to isolate the class under test (CUT) from its dependencies while over-mocking increases the developers' burden on maintaining the mocked objects and may lead to spurious test failures. According to existing work, various factors can determine whether a dependency should be mocked. As a result, mocking decisions are often difficult to make in practice. Studies on the evolution of mocked objects also showed that developers tend to change their mocking decisions: 17% of the studied mocked objects were introduced sometime after the test scripts were created and another 13% of the originally mocked objects eventually became unmocked. In this work, we are motivated to develop an automated technique to make mocking recommendations to facilitate unit testing. We studied 10,846 test scripts in four actively maintained open-source projects that use mocked objects, aiming to characterize the dependencies that are mocked in unit testing. Based on our observations on mocking practices, we designed and implemented a tool, MockSniffer, to identify and recommend mocks for unit tests. The tool is fully automated and requires only the CUT and its dependencies as input. It leverages machine learning techniques to make mocking recommendations by holistically considering multiple factors that can affect developers' mocking decisions. Our evaluation of MockSniffer on ten open-source projects showed that it outperformed three baseline approaches, and achieved good performance in two potential application scenarios.","paperUrl":"https://doi.org/10.1145/3324884.3416539","projectUrl":"https://github.com/henryhchchc/MockSniffer","bibtex":"@inproceedings{10.1145/3324884.3416539,\n author = {Zhu, Hengcheng and Wei, Lili and Wen, Ming and Liu, Yepang and Cheung, Shing-Chi and Sheng, Qin and Zhou, Cui},\n title = {MockSniffer: Characterizing and Recommending Mocking Decisions for Unit Tests},\n year = {2020},\n isbn = {9781450367684},\n publisher = {Association for Computing Machinery},\n address = {New York, NY, USA},\n url = {https://doi.org/10.1145/3324884.3416539},\n doi = {10.1145/3324884.3416539},\n abstract = {In unit testing, mocking is popularly used to ease test effort, reduce test flakiness, and increase test coverage by replacing the actual dependencies with simple implementations. However, there are no clear criteria to determine which dependencies in a unit test should be mocked. Inappropriate mocking can have undesirable consequences: under-mocking could result in the inability to isolate the class under test (CUT) from its dependencies while over-mocking increases the developers' burden on maintaining the mocked objects and may lead to spurious test failures. According to existing work, various factors can determine whether a dependency should be mocked. As a result, mocking decisions are often difficult to make in practice. Studies on the evolution of mocked objects also showed that developers tend to change their mocking decisions: 17% of the studied mocked objects were introduced sometime after the test scripts were created and another 13% of the originally mocked objects eventually became unmocked. In this work, we are motivated to develop an automated technique to make mocking recommendations to facilitate unit testing. We studied 10,846 test scripts in four actively maintained open-source projects that use mocked objects, aiming to characterize the dependencies that are mocked in unit testing. Based on our observations on mocking practices, we designed and implemented a tool, MockSniffer, to identify and recommend mocks for unit tests. The tool is fully automated and requires only the CUT and its dependencies as input. It leverages machine learning techniques to make mocking recommendations by holistically considering multiple factors that can affect developers' mocking decisions. Our evaluation of MockSniffer on ten open-source projects showed that it outperformed three baseline approaches, and achieved good performance in two potential application scenarios.},\n booktitle = {Proceedings of the 35th IEEE/ACM International Conference on Automated Software Engineering},\n pages = {436–447},\n numpages = {12},\n keywords = {unit testing, dependencies, recommendation system, mocking},\n location = {Virtual Event, Australia},\n series = {ASE '20}\n }","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"How Do Python Framework APIs Evolve? An Exploratory Study","date":"2020","authors":["Zhaoxu Zhang","Hengcheng Zhu","Ming Wen","Yida Tao","Yepang Liu","Yingfei Xiong"],"venue":"International Conference on Software Analysis, Evolution and Reengineering","venueShort":"SANER","tags":["Python","API Evolution"],"abstract":"Python is a popular dynamic programming language. In recent years, many frameworks implemented in Python have been widely used for data science and web development. Similar to frameworks in other languages, the APIs provided by Python frameworks often evolve, which would inevitably induce compatibility issues in client applications. While existing work has studied the evolution of frameworks in static programming languages such as Java, little is known on how Python framework APIs evolve and the characteristics of the compatibility issues induced by such evolution. To bridge this gap, we take a first look at the evolution of Python framework APIs and the resulting compatibility issues in client applications. We analyzed 288 releases of six popular Python frameworks from three different domains and 5,538 open-source projects built on these frameworks. We investigated the evolution patterns of Python framework APIs and found that they largely differ from those of Java framework APIs. We also investigated the compatibility issues in client applications and identified common strategies that developers adopt to fix these issues. Based on the empirical findings, we designed and implemented a tool, PYCOMPAT , to automatically detect compatibility issues caused by misusing evolved framework APIs in Python applications. Experiments on 10 real-world projects show that our tool can effectively detect compatibility issues of developers' concern.","paperUrl":"https://doi.org/10.1109/SANER48275.2020.9054800","projectUrl":"https://github.com/sqlab-sustech/PyCompat","bibtex":"@INPROCEEDINGS{9054800,\n author={Zhang, Zhaoxu and Zhu, Hengcheng and Wen, Ming and Tao, Yida and Liu, Yepang and Xiong, Yingfei},\n booktitle={2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering (SANER)},\n title={How Do Python Framework APIs Evolve? An Exploratory Study},\n year={2020},\n volume={},\n number={},\n pages={81-92},\n doi={10.1109/SANER48275.2020.9054800}\n }","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Characterizing and Detecting Configuration Compatibility Issues in Android Apps","date":"2021-08-26","authors":["Huaxun Huang","Ming Wen","Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"Proceedings of the 36th IEEE/ACM International Conference on Automated Software Engineering (ASE '21)","venueShort":"ASE","tags":["Android","Compatibility Issues","XML Configurations"],"abstract":"\nXML configuration files are widely used in Android to define an app's user interface and essential runtime information such as system permissions. As Android evolves, it might introduce functional changes in the configuration environment, thus causing compatibility issues that manifest as inconsistent app behaviors at different API levels. Such issues can often induce software crashes and inconsistent look-and-feel when running at specific Android versions. Existing works incur plenty of false positive and false negative issue-detection rules by conducting trivial data-flow analysis while failing to model the XML tree hierarchies of the Android configuration files. Besides, little is known about how the changes in an Android framework can induce such compatibility issues. To bridge such gaps, we conducted a systematic study by analyzing 196 real-world issues collected from 43 popular apps. We identified common patterns of Android framework code changes that induce such configuration compatibility issues. Based on the findings, we propose ConfDroid that can automatically extract rules for detecting configuration compatibility issues. The intuition is to perform symbolic execution based on a model learned from the common code change patterns. Experiment results show that ConfDroid can successfully extract 282 valid issue-detection rules with a precision of 91.9%. Among them, 65 extracted rules can manifest issues that cannot be detected by the rules of state-of-the-art baselines. More importantly, 11 out of them have led to the detection of 107 reproducible configuration compatibility issues that the baselines cannot detect in 30 out of 316 real-world Android apps.\n ","projectUrl":"https://sites.google.com/view/confdroid","paperUrl":"https:/castlelab.github.io/selected-publications/assets/ConfDroid-ASE21.pdf","arxivUrl":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"FlashSchema: Achieving High Quality XML Schemas with Powerful Inference Algorithms and Large-scale Schema Data.","date":"2020","authors":["Yeting LI"," Jialun CAO"," Haiming CHEN"," Tingjian GE"," Zhiwu XU"," Qiancheng PENG"],"venue":"International Conference on Data Engineering","venueShort":"ICDE","tags":["XML Schemas","Schemas Inference"],"abstract":"Getting high quality XML schemas to avoid or reduce application risks is an important problem in practice, for which some important aspects have yet to be addressed satisfactorily in existing work. In this paper, we propose a tool FlashSchema for high quality XML schema design, which supports both one-pass and interactive schema design and schema recommendation. To the best of our knowledge, no other existing tools support interactive schema design and schema recommendation. One salient feature of our work is the design of algorithms to infer k-occurrence interleaving regular expressions, which are not only more powerful in model capacity, but also more efficient. Additionally, such algorithms form the basis of our interactive schema design. The other feature is that, starting from largescale schema data that we have harvested from the Web, we devise a new solution for type inference, as well as propose schema recommendation for schema design. Finally, we conduct a series of experiments on two XML datasets, comparing with 9 state-of-the-art algorithms and open-source tools in terms of running time, preciseness, and conciseness. Experimental results show that our work achieves the highest level of preciseness and conciseness within only a few seconds. Experimental results and examples also demonstrate the effectiveness of our type inference and schema recommendation methods.","paperUrl":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9101818","arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"FlashRegex: Deducing Anti-ReDoS Regexes from Examples.","date":"2020","authors":["Yeting LI"," Zhiwu XU"," Jialun CAO"," Haiming CHEN"," Tingjian GE"," Shing-Chi CHEUNG","Haoren ZHAO"],"venue":"International Conference on Automated Software Engineering","venueShort":"ASE","tags":["regular expression","Anti-ReDoS","program synthesis","program repair"],"abstract":"Regular expressions (regexes) are widely used in different fields of computer science such as programming languages, string processing and databases. However, existing tools for synthesizing or repairing regexes were not designed to be resilient to Regex Denial of Service (ReDoS) attacks. Specifically, if a regex has super-linear (SL) worst-case complexity, an attacker could provide carefully-crafted inputs to launch ReDoS attacks. Therefore, in this paper, we propose a programming-by-example framework, FlashRegex, for generating anti-ReDoS regexes by either synthesizing or repairing from given examples. It is the first framework that integrates regex synthesis and repair with the awareness of ReDoS-vulnerabilities. We present novel algorithms to deduce anti-ReDoS regexes by reducing the ambiguity of these regexes and by using Boolean Satisfiability (SAT) or Neighborhood Search (NS) techniques. We evaluate FlashRegex with five related state-of-the-art tools. The evaluation results show that our work can effectively and efficiently generate anti-ReDoS regexes from given examples, and also reveal that existing synthesis and repair tools have neglected ReDoS-vulnerabilities of regexes. Specifically, the existing synthesis and repair tools generated up to 394 ReDoS-vulnerable regex within few seconds to more than one hour, while FlashRegex generated no SL regex within around five seconds. Furthermore, the evaluation results on ReDoS-vulnerable regex repair also show that FlashRegex has better capability than existing repair tools and even human experts, achieving 4 more ReDoS-invulnerable regex after repair without trimming and resorting, highlighting the usefulness of FlashRegex in terms of the generality, automation and user-friendliness","paperUrl":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9286092","arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"TransRegex: Multi-modal Regular Expression Synthesis by Generate-and-Repair.","date":"2021","authors":["Yeting LI"," Shuaimin LI"," Zhiwu XU"," Jialun CAO"," Zixuan CHEN"," Yun HU"," Haiming CHEN","Shing-Chi CHEUNG"],"venue":"International Conference on Software Engineering","venueShort":"ICSE","tags":["regular expression","regular expression synthesis","regex synthesis","regex repair","programming by example","programming by natural language"],"abstract":"Since regular expressions (abbrev. regexes) are difficult to understand and compose, automatically generating regexes has been an important research problem. This paper introduces Tr ansRegex , lor automatically constructing regexes from both natural language descriptions and examples. To the best of our knowledge, Tr ansRegex is the first to treat the Nl.P-and-example-based regex synthesis problem as the problem of NLP-based synthesis with regex repair. For this purpose, we present novel algorithms for both NLP-based synthesis and regex repair. We evaluate Tr ansRegex with ten relevant state-of-theart tools on three publicly available datasets. The evaluation results demonstrate that the accuracy of our Tr ansRegex is 17.4%, 35.8% and 38.9% higher than that of NLP-hased approaches on the three datasets, respectively. Furthermore, T r ansRegex can achieve higher accuracy than the stateof-the-art multi-modal techniques with 10% to 30% higher accuracy on all three datasets. The evaluation results also indicate Tr ansRegex utilizing natural language and examples in a more effective way.","paperUrl":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9401951","arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"ReDoSHunter: A Combined Static and Dynamic Approach for Regular Expression DoS Detection.","date":"2021","authors":["Yeting Li"," Zixuan Chen"," Jialun Cao"," Zhiwu Xu"," Qiancheng Peng"," Haiming Chen"," Liyuan Chen"," Shing-Chi Cheung"],"venue":"USENIX Security Symposium","venueShort":"USENIX Security","tags":["regular expression","Anti-ReDoS","ReDoS detection"],"abstract":"Regular expression Denial of Service (ReDoS) is a class of algorithmic complexity attacks where there exist inputs causing the typical backtracking-based matching algorithms to run super-linear time. Considering the widespread use of regular expressions (regexes), ReDoS is a pervasive and serious threat. Thus, early detection of ReDoS-vulnerable regexes in software projects is vital. Existing detection approaches mainly fall into two categories: static and dynamic analysis. However, the static approaches detect more candidate vulnerabilities at the cost of low precision, while dynamic approaches guarantee the precision of detection yet compromise the recall. Detecting ReDos at both high precision and high recall remains unsolved. Furthermore, we observed that a ReDoSvulnerable regex often contains more than one vulnerability in practice. However, existing tools are incapable of detecting multiple vulnerabilities in one regex. To bridge the gaps, we proposes ReDoSHunter, a ReDoSvulnerable regex detection framework that can effectively pinpoint the multiple root causes of a vulnerable regex and generate the associated attack-triggering strings. Driven by our concluded five vulnerability patterns, ReDoSHunter can not only pinpoint the multiple vulnerabilities in one regex, but also assess the degree (i.e., exponential or polynomial) of vulnerabilities it detects. The experiment results show that ReDoSHunter is able to achieve 100% precision and 100% recall on three large-scale datasets with 37,651 regexes. Furthermore, apart from being able to detect 100% the confirmed ReDoS CVEs (compared with 14.29%-60.00% achieved by existing works), ReDoSHunter also exposed 28 new ReDoSvulnerabilities in intensively-tested projects, resulting in 26 assigned CVEs and 2 fixed by developers.","paperUrl":null,"arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Programming by Example Made Easy","date":"2023","authors":["Jiarong Wu","Lili Wei","Yanyan Jiang","Shing-Chi Cheung","Luyao Ren","Chang Xu"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["Programming Synthesis","Programming by Example"],"abstract":"Programming by example (PBE) is an emerging programming paradigm that automatically synthesizes programs specified by user-provided input-output examples. Despite the convenience for end-users, implementing PBE tools often requires strong expertise in programming language and synthesis algorithms. Such a level of knowledge is uncommon among software developers. It greatly limits the broad adoption of PBE by the industry. To facilitate the adoption of PBE techniques, we propose a PBE framework called Bee, which leverages an “entity-action” model based on relational tables to ease PBE development for a wide but restrained range of domains. Implementing PBE tools with Bee only requires adapting domain-specific data entities and user actions to tables, with no need to design a domain-specific language or an efficient synthesis algorithm. The synthesis algorithm of Bee exploits bidirectional searching and constraint-solving techniques to address the challenge of value computation nested in table transformation. We evaluated Bee’s effectiveness on 64 PBE tasks from three different domains and usability with a human study of 12 participants. Evaluation results show that Bee is easier to learn and use than the state-of-the-art PBE framework, and the bidirectional algorithm achieves comparable performance to domain-specifically optimized synthesizers.","paperUrl":"https://dl.acm.org/doi/10.1145/3607185","projectUrl":"https://github.com/Sissel-Wu/Bee","bibtex":"@article{10.1145/3607185,\nauthor = {Wu, Jiarong and Wei, Lili and Jiang, Yanyan and Cheung, Shing-Chi and Ren, Luyao and Xu, Chang},\ntitle = {Programming by Example Made Easy},\nyear = {2023},\nissue_date = {January 2024},\npublisher = {Association for Computing Machinery},\naddress = {New York, NY, USA},\nvolume = {33},\nnumber = {1},\nissn = {1049-331X},\nurl = {https://doi.org/10.1145/3607185},\ndoi = {10.1145/3607185},\nabstract = {Programming by example (PBE) is an emerging programming paradigm that automatically synthesizes programs specified by user-provided input-output examples. Despite the convenience for end-users, implementing PBE tools often requires strong expertise in programming language and synthesis algorithms. Such a level of knowledge is uncommon among software developers. It greatly limits the broad adoption of PBE by the industry. To facilitate the adoption of PBE techniques, we propose a PBE framework called Bee, which leverages an “entity-action” model based on relational tables to ease PBE development for a wide but restrained range of domains. Implementing PBE tools with Bee only requires adapting domain-specific data entities and user actions to tables, with no need to design a domain-specific language or an efficient synthesis algorithm. The synthesis algorithm of Bee exploits bidirectional searching and constraint-solving techniques to address the challenge of value computation nested in table transformation. We evaluated Bee’s effectiveness on 64 PBE tasks from three different domains and usability with a human study of 12 participants. Evaluation results show that Bee is easier to learn and use than the state-of-the-art PBE framework, and the bidirectional algorithm achieves comparable performance to domain-specifically optimized synthesizers.},\njournal = {ACM Trans. Softw. Eng. Methodol.},\nmonth = {nov},\narticleno = {4},\nnumpages = {36},\nkeywords = {programming by example, Program synthesis}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Can Systems Explain Permissions Better? Understanding Users' Misperceptions under Smartphone Runtime Permission Model","date":"2021-08-11","authors":["Bingyu Shen","Lili Wei","Chengcheng Xiang","Yudong Wu","Mingyao Shen","Yuanyuan Zhou","Xinxin Jin"],"venue":"the 30th USENIX Security Symposium, Vancouver, BC, Canada, Aug 11-13 2021","venueShort":"USENIX Security","tags":["Android","User study","Security"],"abstract":"\n Current smartphone operating systems enable users to manage permissions according to their personal preferences with a runtime permission model. Nonetheless, the systems provide very limited information when requesting permissions, making it difficult for users to understand permissions’ capabilities and potentially induced risks.\n In this paper, we first investigated to what extent current system-provided information can help users understand the scope of permissions and their potential risks. We took a mixed-methods approach by collecting real permission settings from 4,636 Android users, an interview study of 20 participants, and large-scale Internet surveys of 1559 users. Our study identified several common misunderstandings on the runtime permission model among users. We found that only a very small percentage (6.1%) of users can infer the scope of permission groups accurately from the system-provided information. This indicates that the information provided by current systems is far from sufficient.\n We thereby explored what extra information that systems can provide to help users make more informed permission decisions. By surveying users’ common concerns on apps’ permission requests, we identified five types of information (i.e., decision factors) that are helpful for users’ decisions. We further studied the impact and helpfulness of the factors to users’ permission decisions with both positive and negative messages. Our study shows that the background access factor helps most while the grant rate helps the least. Based on the findings, we provide suggestions for system designers to enhance future systems with more permission information.\n ","projectUrl":"https://ucsdopera.github.io/PermissionStudyUsenix21/dataset/","paperUrl":"http://cseweb.ucsd.edu/~byshen/files/sec21-shen.pdf","arxivUrl":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Logging Practices with Mobile Analytics: An Empirical Study on Firebase","date":"2021-05-17","authors":["Julian Harty","Haonan Zhang","Lili Wei","Luca Pascarella","Maurício Aniche","Weiyi Shang"],"venue":"the 8th IEEE/ACM International Conference on Mobile Software Engineering and Systems, Madrid, Spain, May 17-19 2021","venueShort":"MOBILESoft","tags":["Android","Empirical study"],"abstract":"\n Software logs are of great value in both industrial and open-source projects. Mobile analytics logging enables developers to collect logs remotely from their apps running on end user devices at the cost of recording and transmitting logs across the Internet to a centralised infrastructure.\n This paper makes a first step in characterising logging practices with a widely adopted mobile analytics logging library, namely Firebase Analytics. We provide an empirical evaluation of the use of Firebase Analytics in 57 open-source Android applications by studying the evolution of code-bases to understand: a) the needs-in-common that push practitioners to adopt logging practices on mobile devices, and b) the differences in the ways developers use local and remote logging.\n Our results indicate mobile analytics logs are less pervasive and less maintained than traditional logging code. Based on our analysis, we believe logging using mobile analytics is more user centered compared to traditional logging, where the latter is mainly used to record information for debugging purposes.\n ","paperUrl":"https://arxiv.org/abs/2104.02513","arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Characterizing Transaction-Reverting Statements inEthereum Smart Contracts","date":"2021-11-15","authors":["Lu Liu","Lili Wei","Wuqi Zhang","Ming Wen","Yepang Liu","Shing-Chi Cheung"],"venue":"The 36th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","tags":["Blockchain","Smart Contracts","Empirical Study"],"awards":[],"abstract":"\nSmart contracts are programs stored on blockchains to execute transactions. \nWhen input constraints or security properties are violated at runtime, the transaction being executed by a smart contract needs to be reverted to avoid undesirable consequences.\nOn Ethereum, the most popular blockchain that supports smart contracts, developers can choose among three transaction-reverting statements (i.e., require, if...revert, and if...throw) to handle anomalous transactions.\nWhile these transaction-reverting statements are vital for preventing smart contracts from exhibiting abnormal behaviors or suffering malicious attacks, there is limited understanding of how they are used in practice. \nIn this work, we perform the first empirical study to characterize transaction-reverting statements in Ethereum smart contracts. \nWe measured the prevalence of these statements in 3,866 verified smart contracts from popular dapps and built a taxonomy of their purposes via manually analyzing 557 transaction-reverting statements.\nWe also compared template contracts and their corresponding custom contracts to understand how developers customize the use of transaction-reverting statements.\nFinally, we analyzed the security impact of transaction-reverting statements by removing them from smart contracts and comparing the mutated contracts against the original ones. \nOur study led to important findings.\nFor example, we found that transaction-reverting statements are commonly used to perform seven types of authority verifications or validity checks, and missing such statements may compromise the security of smart contracts.\nWe also found that current smart contract security analyzers cannot effectively handle transaction-reverting statements when detecting security vulnerabilities.\nOur findings can shed light on further research in the broad area of smart contract quality assurance and provide practical guidance to smart contract developers on the appropriate use of transaction-reverting statements. \n ","projectUrl":"https://github.com/transaction-reverting-statements/Characterizing-require-statement-in-Ethereum-Smart-Contract","arxivUrl":"https://arxiv.org/abs/2108.10799","paperUrl":"https:/castlelab.github.io/selected-publications/assets/Characterizing_Transaction_Reverting_Statements-ASE21.pdf","slidesUrl":null,"bibtex":""},{"title":"Nyx: Detecting Exploitable Front-Running Vulnerabilities in Smart Contracts","date":"2024-05-20","authors":["Wuqi Zhang","Zhuo Zhang","Qingkai Shi","Lu Liu","Lili Wei","Yepang Liu","Xiangyu Zhang","Shing-Chi Cheung"],"venue":"The 45th IEEE Symposium on Security and Privacy","venueShort":"S&P","tags":["Blockchain","Front-running","Vulnerability","MEV","Program Analysis"],"abstract":"Smart contracts are susceptible to front-running attacks, in which malicious users leverage prior knowledge of upcoming transactions to execute attack transactions in advance and benefit their own portfolios. Existing contract analysis techniques raise a number of false positives and false negatives in that they simplistically treat data races in a contract as front-running vulnerabilities and can only analyze contracts in isolation. In this work, we formalize the definition of exploitable front-running vulnerabilities based on previous empirical studies on historical attacks, and present Nyx, a novel static analyzer to detect them. Nyx features a Datalog-based preprocessing procedure that efficiently and soundly prunes a large part of the search space, followed by a symbolic validation engine that precisely locates vulnerabilities with an SMT solver. We evaluate Nyx using a large dataset that comprises 513 real-world front-running attacks in smart contracts. Compared to six state-of-the-art techniques, Nyx surpasses them by 32.64%-90.19% in terms of recall and 2.89%-70.89% in terms of precision. Nyx has also identified four zero-days in real-world smart contracts.","projectUrl":null,"arxivUrl":null,"paperUrl":"https:/castlelab.github.io/selected-publications/assets/Nyx-SP24.pdf","bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Combatting Front-Running in Smart Contracts: Attack Mining, Benchmark Construction and Vulnerability Detector Evaluation","date":"2023-04-15","authors":["Wuqi Zhang","Lili Wei","Shing-Chi Cheung","Yepang Liu","Shuqing Li","Lu Liu","Michael R. Lyu"],"venue":"Transactions on Software Engineering","venueShort":"TSE","tags":["Blockchain","Front-running","Vulnerability","MEV","Benchmark"],"abstract":"\n Front-running attacks have been a major concern on the blockchain. Attackers launch front-running attacks by inserting additional transactions before upcoming victim transactions to manipulate victim transaction executions and make profits. Recent studies have shown that front-running attacks are prevalent on the Ethereum blockchain and have caused millions of US dollars loss. Vulnerable smart contracts, blockchain programs invoked by transactions, are held responsible for front-running attacks. Although techniques to detect front-running vulnerabilities have been proposed, their performance on real-world vulnerable contracts is unclear. There is no large-scale benchmark based on real attacks to evaluate their capabilities. This motivates us to build a benchmark consisting of 513 real-world attacks with vulnerable code labeled in 235 distinct smart contracts. We propose automated techniques to effectively collect real-world attacks and localize the corresponding vulnerable code at scale. Our experiments show that our approaches are effective, achieving higher recall in finding real attacks and higher precision in pinpointing vulnerabilities compared to the existing techniques. The evaluation of seven state-of-the-art vulnerability detection techniques on the benchmark reveals their inadequacy in detecting front-running vulnerabilities, with a low recall of at most 6.04%. Our further analysis identifies four common limitations in existing techniques: lack of support for inter-contract analysis, inefficient constraint solving for cryptographic operations, improper vulnerability patterns, and lack of token support.\n ","projectUrl":"https://github.com/Troublor/erebus-redgiant","arxivUrl":null,"paperUrl":"https://ieeexplore.ieee.org/document/10108045","bibtex":null,"slidesUrl":null,"awards":[]},{"title":"ÐArcher: Detecting On-Chain-Off-Chain Synchronization Bugs in Decentralized Applications","date":"2021-08-23","authors":["Wuqi Zhang","Lili Wei","Shuqing Li","Yepang Liu","Shing-Chi Cheung"],"venue":"Proceedings of the 29th ACM Joint European SoftwareEngineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE ’21)","venueShort":"ESEC/FSE","tags":["Decentralized Applications","Testing","Blockchain"],"abstract":"\n Since the emergence of Ethereum, blockchain-based decentralized applications (DApps) have become increasingly popular and important. To balance the security, performance, and costs, a DApp typically consists of two layers: an on-chain layer to execute transactions and store crucial data on blockchain, and an off-chain layer to interact with users. A DApp needs to proactively synchronize its off-chain layer with the on-chain layer, otherwise, the inconsistent data in the off-chain layer could mislead users and cause undesirable consequences, e.g., loss of transaction fees. However, transactions sent to blockchain are not guaranteed to be executed and could even be reversed after execution due to chain reorganization. Such non-determinism in the transaction execution is unique to blockchain and DApp developers may fail to perform the on-chain-off-chain synchronization accurately due to their unfamiliarity of the complex transaction lifecycle.\n In this work, we investigate the challenges of synchronizing on-chain and off-chain data in Ethereum-based DApps. We present two types of bugs that could result in inconsistencies between the on-chain and off-chain layers. To help detect such on-chain-off-chain synchronization bugs, we introduce a state transition model to guide the testing of DApps, and propose two effective oracles to facilitate the automatic identification of bugs. We build the first testing framework, ÐArcher, to detect on-chain-off-chain synchronization bugs in DApps. We have evaluated ÐArcher on 11 popular real-world DApps. ÐArcher achieves high precision (99.3%), recall (87.6%), and accuracy (89.4%) in bug detection and significantly outperforms the baseline methods. It has found 15 real bugs in the 11 DApps. So far, six of the 15 bugs have been confirmed by the developers and three have been fixed. These promising results demonstrate the usefulness of ÐArcher.\n ","projectUrl":"https://github.com/Troublor/darcher","arxivUrl":"https://arxiv.org/pdf/2106.09440.pdf","paperUrl":"https:/castlelab.github.io/selected-publications/assets/DArcher-FSE21.pdf","bibtex":"@inproceedings{10.1145/3468264.3468546,\n author = {Zhang, Wuqi and Wei, Lili and Li, Shuqing and Liu, Yepang and Cheung, Shing-Chi},\n title = {DH{}Archer: Detecting on-Chain-off-Chain Synchronization Bugs in Decentralized Applications},\n year = {2021},\n isbn = {9781450385626},\n publisher = {Association for Computing Machinery},\n address = {New York, NY, USA},\n url = {https://doi.org/10.1145/3468264.3468546},\n doi = {10.1145/3468264.3468546},\n abstract = {Since the emergence of Ethereum, blockchain-based decentralized applications (DApps)\n have become increasingly popular and important. To balance the security, performance,\n and costs, a DApp typically consists of two layers: an on-chain layer to execute transactions\n and store crucial data on the blockchain and an off-chain layer to interact with users.\n A DApp needs to synchronize its off-chain layer with the on-chain layer proactively.\n Otherwise, the inconsistent data in the off-chain layer could mislead users and cause\n undesirable consequences, e.g., loss of transaction fees. However, transactions sent\n to the blockchain are not guaranteed to be executed and could even be reversed after\n execution due to chain reorganization. Such non-determinism in the transaction execution\n is unique to blockchain. DApp developers may fail to perform the on-chain-off-chain\n synchronization accurately due to their lack of familiarity with the complex transaction\n lifecycle. In this work, we investigate the challenges of synchronizing on-chain and\n off-chain data in Ethereum-based DApps. We present two types of bugs that could result\n in inconsistencies between the on-chain and off-chain layers. To help detect such\n on-chain-off-chain synchronization bugs, we introduce a state transition model to\n guide the testing of DApps and propose two effective oracles to facilitate the automatic\n identification of bugs. We build the first testing framework, DH{}Archer, to detect on-chain-off-chain\n synchronization bugs in DApps. We have evaluated DH{}Archer on 11 popular real-world\n DApps. DH{}Archer achieves high precision (99.3%), recall (87.6%), and accuracy (89.4%)\n in bug detection and significantly outperforms the baseline methods. It has found\n 15 real bugs in the 11 DApps. So far, six of the 15 bugs have been confirmed by the\n developers, and three have been fixed. These promising results demonstrate the usefulness\n of DH{}Archer.},\n booktitle = {Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},\n pages = {553–565},\n numpages = {13},\n keywords = {Software testing, DApps, Decentralized applications, Blockchain},\n location = {Athens, Greece},\n series = {ESEC/FSE 2021}\n }","slidesUrl":null,"awards":[]},{"title":"Will Dependency Conflicts Affect My Program's Semantics?","date":"2021","authors":["Ying Wang","Rongxin Wu","Chao Wang","Ming Wen","Yepang Liu","Shing-Chi Cheung","Hai Yu","Chang Xu","Zhiliang Zhu"],"venue":"IEEE Transactions on Software Engineering","venueShort":"TSE","tags":["Third-Party Libraries","Java","Dependency Management"],"abstract":"\n Java projects are often built on top of various third-party libraries. If multiple versions of a library exist on the classpath, JVM will only load one version and shadow the others, which we refer to as dependency conflicts. This would give rise to semantic conflict (SC) issues, if the library APIs referenced by a project have identical method signatures but inconsistent semantics across the loaded and shadowed versions of libraries. SC issues are difficult for developers to diagnose in practice, since understanding them typically requires domain knowledge. Although adapting the existing test generation technique for dependency conflict issues, Riddle, to detect SC issues is feasible, its effectiveness is greatly compromised. This is mainly because Riddle randomly generates test inputs, while the SC issues typically require specific arguments in the tests to be exposed. To address that, we conducted an empirical study of 316 real SC issues to understand the characteristics of such specific arguments in the test cases that can capture the SC issues. Inspired by our empirical findings, we propose an automated testing technique Sensor, which synthesizes test cases using ingredients from the project under test to trigger inconsistent behaviors of the APIs with the same signatures in conflicting library versions. Our evaluation results show that Sensor is effective and useful: it achieved a Precision of 0.898 and a Recall of 0.725 on open-source projects and a Precision of 0.821 on industrial projects; it detected 306 semantic conflict issues in 50 projects, 70.4% of which had been confirmed as real bugs, and 84.2% of the confirmed issues have been fixed quickly.\n ","projectUrl":"https://sensordc.github.io/","paperUrl":"https://ieeexplore.ieee.org/document/9350237","slidesUrl":null,"bibtex":"@article{YingSensor,\n author = {Ying Wang and\n Rongxin Wu and\n Chao Wang and\n Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung\n Hai Yu and\n Chang Xu\n and Zhiliang Zhu},\n title = {Will Dependency Conflicts Affect My Program's Semantics?},\n journal = {{IEEE} Transactions on Software Engineering},\n volume = {99},\n number = {1},\n pages = {1--22},\n year = {2021},\n url = {https://ieeexplore.ieee.org/document/9350237},\n doi = {10.1109/TSE.2021.3057767},\n timestamp = {Fri, 08 February 2021 21:56:08 +0200},\n biburl = {https://dblp.org/rec/journals/tsc/WangHXZC20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n }","arxivUrl":null,"awards":[]},{"title":"Hero: On the Chaos When PATH Meets Modules","date":"2021","authors":["Ying Wang","Liang Qiao","Chang Xu","Yepang Liu","Shing-Chi Cheung","Na Meng","Hai Yu","Zhiliang Zhu"],"venue":"Proceedings of the 43rd International Conference on Software Engineering (ICSE ’21)","venueShort":"ICSE","tags":["Third-Party Libraries","Golang","Dependency Management"],"awards":["Distinguished Paper"],"abstract":"\n Ever since its first release in 2009, the Go programming language (Golang) has been well received by software communities. A major reason for its success is the powerful support of library-based development, where a Golang project can be conveniently built on top of other projects by referencing them as libraries. As Golang evolves, it recommends the use of a new library-referencing mode to overcome the limitations of the original one. While these two library modes are incompatible, both are supported by the Golang ecosystem. The heterogeneous use of library-referencing modes across Golang projects has caused numerous dependency management (DM) issues, incurring reference inconsistencies and even build failures. Motivated by the problem, we conducted an empirical study to characterize the DM issues, understand their root causes, and examine their fixing solutions. Based on our findings, we developed Hero, an automated technique to detect DM issues and suggest proper fixing solutions. We applied Hero to 19,000 popular Golang projects. The results showed that Hero achieved a high detection rate of 98.5% on a DM issue benchmark and found 2,422 new DM issues in 2,356 popular Golang projects. We reported 280 issues, among which 181 (64.6%) issues have been confirmed, and 160 of them (88.4%) have been fixed or are under fixing. Almost all the fixes have adopted our fixing suggestions.\n ","projectUrl":"http://www.hero-go.com/","paperUrl":"https://conf.researchr.org/details/icse-2021/icse-2021-papers/16/Hero-On-the-Chaos-When-PATH-Meets-Modules","slidesUrl":null,"bibtex":"@inproceedings{YingHero,\n author = {Ying Wang and\n Liang Qiao and\n Chang Xu and\n Yepang Liu and\n Shing{-}Chi Cheung and\n Na Meng and\n Hai Yu and \n Zhiliang Zhu},\n title = {Hero: On the Chaos When PATH Meets Modules},\n booktitle = {{ICSE} '21: 43rd International Conference on Software Engineering, Virtual\n Event, Spain, May 23-29, 2021},\n pages = {99--111},\n publisher = {{IEEE}},\n year = {2021},\n url = {https://ieeexplore.ieee.org/document/9401974},\n doi = {10.1109/ICSE43902.2021.00022},\n timestamp = {22-30 May 2021 10:58:23 +0100},\n biburl = {https://dblp.org/rec/conf/sigsoft/ZhangRC0C020.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null},{"title":"Watchman: Monitoring Dependency Conflicts for Python Library Ecosystem","date":"2020","authors":["Ying Wang","Ming Wen","Yepang Liu","Yibo Wang","Zhenming Li","Chao Wang","Shing-Chi Cheung","Hai Yu","Chang Xu","Zhiliang Zhu"],"venue":"Proceedings of the 42rd International Conference on Software Engineering (ICSE ’20)","venueShort":"ICSE","tags":["Third-Party Libraries","Python","Dependency Management"],"abstract":"\n The PyPI ecosystem has indexed millions of Python libraries to allow developers to automatically download and install dependencies of their projects based on the specified version constraints. Despite the convenience brought by automation, version constraints in Python projects can easily conflict, resulting in build failures. We refer to such conflicts as Dependency Conflict (DC) issues. Although DC issues are common in Python projects, developers lack tool support to gain a comprehensive knowledge for diagnosing the root causes of these issues. In this paper, we conducted an empirical study on 235 real-world DC issues. We studied the manifestation patterns and fixing strategies of these issues and found several key factors that can lead to DC issues and their regressions. Based on our findings, we designed and implemented Watchman, a technique to continuously monitor dependency conflicts for the PyPI ecosystem. In our evaluation, Watchman analyzed PyPI snapshots between 11 Jul 2019 and 16 Aug 2019, and found 117 potential DC issues. We reported these issues to the developers of the corresponding projects. So far, 63 issues have been confirmed, 38 of which have been quickly fixed by applying our suggested patches.\n ","projectUrl":"http://www.watchman-pypi.com/","paperUrl":"https://dl.acm.org/doi/abs/10.1145/3377811.3380426","slidesUrl":"https://blog.acolyer.org/2020/09/21/watchman/","bibtex":"@inproceedings{YingWatchman,\n author = {Ying Wang and\n Ming Wen and\n Yepang Liu and\n Yibo Wang and\n Zhenming Li and\n Chao Wang and\n Shing{-}Chi Cheung and\n Hai Yu and\n Chang Xu and\n Zhiliang Zhu\n },\n title = {Watchman: Monitoring Dependency Conflicts for Python Library Ecosystem},\n booktitle = {{ICSE} '20: 42nd International Conference on Software Engineering, Virtual\n Event, Spain, July 6-11, 2020},\n pages = {125--135},\n publisher = {{ACM}},\n year = {2020},\n url = {https://dl.acm.org/doi/abs/10.1145/3377811.3380426},\n doi = {10.1145/3377811.3380426},\n timestamp = {Mon, 27 July 2020 16:42:27 +0200},\n biburl = {https://dblp.uni-trier.de/db/conf/icse/icse2020.html},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"To What Extent Do DNN-based Image Classification Models Make Unreliable Inferences?","date":"2021","authors":["Yongqiang Tian","Shiqing Ma","Ming Wen","Yepang Liu","Shing-Chi Cheung","Xiangyu Zhang"],"venue":"Empirical Software Engineering","venueShort":"EMSE","tags":["Testing","DNN model","Metamorphic Testing"],"abstract":"Deep Neural Network (DNN) models are widely used for image classification. While they offer high performance in terms of accuracy, researchers are concerned about if these models inappropriately make inferences using features irrelevant to the target object in a given image. To address this concern, we propose a metamorphic testing approach that assesses if a given inference is made based on irrelevant features. Specifically, we propose two metamorphic relations (MRs) to detect such unreliable inferences. These relations expect (a) the classification results with different labels or the same labels but less certainty from models after corrupting the relevant features of images, and (b) expect the classification results with the same labels after corrupting irrelevant features. The inferences that violate the metamorphic relations are regarded as unreliable inferences.\nOur evaluation demonstrated that our approach can effectively identify unreliable inferences for single-label classification models with an average precision of 64.1% and 96.4% for the two MRs, respectively. As for multi-label classification models, the corresponding precision for MR-1 and MR-2 is 78.2% and 86.5%, respectively. Further, we conducted an empirical study to understand the problem of unreliable inferences in practice. Specifically, we applied our approach to 18 pre-trained single-label image classification models and 3 multi-label classification models, and then examined their inferences on the ImageNet and COCO datasets. We found that unreliable inferences are pervasive. Specifically, for each model, more than thousands of correct classifications are actually made using irrelevant features. Next, we investigated the effect of such pervasive unreliable inferences, and found that they can cause significant degradation of a model's overall accuracy. After including these unreliable inferences from the test set, the model's accuracy can be significantly changed. Therefore, we recommend that developers should pay more attention to these unreliable inferences during the model evaluations. We also explored the correlation between model accuracy and the size of unreliable inferences. We found the inferences of the input with smaller objects are easier to be unreliable. Lastly, we found that the current model training methodologies can guide the models to learn object-relevant features to certain extent, but may not necessarily prevent the model from making unreliable inferences. We encourage the community to propose more effective training methodologies to address this issue.\n","projectUrl":"https://github.com/yqtianust/PaperUnreliableInference","paperUrl":"https://doi.org/10.1007/s10664-021-09985-1","bibtex":"@Article{Tian2021,\n author={Tian, Yongqiang\n and Ma, Shiqing\n and Wen, Ming\n and Liu, Yepang\n and Cheung, Shing-Chi\n and Zhang, Xiangyu},\n title={To what extent do DNN-based image classification models make unreliable inferences?},\n journal={Empirical Software Engineering},\n year={2021},\n month={Jun},\n day={18},\n volume={26},\n number={5},\n pages={84},\n abstract={Deep Neural Network (DNN) models are widely used for image classification. While they offer high performance in terms of accuracy, researchers are concerned about if these models inappropriately make inferences using features irrelevant to the target object in a given image. To address this concern, we propose a metamorphic testing approach that assesses if a given inference is made based on irrelevant features. Specifically, we propose two metamorphic relations (MRs) to detect such unreliable inferences. These relations expect (a) the classification results with different labels or the same labels but less certainty from models after corrupting the relevant features of images, and (b) the classification results with the same labels after corrupting irrelevant features. The inferences that violate the metamorphic relations are regarded as unreliable inferences. Our evaluation demonstrated that our approach can effectively identify unreliable inferences for single-label classification models with an average precision of 64.1{\\%} and 96.4{\\%} for the two MRs, respectively. As for multi-label classification models, the corresponding precision for MR-1 and MR-2 is 78.2{\\%} and 86.5{\\%}, respectively. Further, we conducted an empirical study to understand the problem of unreliable inferences in practice. Specifically, we applied our approach to 18 pre-trained single-label image classification models and 3 multi-label classification models, and then examined their inferences on the ImageNet and COCO datasets. We found that unreliable inferences are pervasive. Specifically, for each model, more than thousands of correct classifications are actually made using irrelevant features. Next, we investigated the effect of such pervasive unreliable inferences, and found that they can cause significant degradation of a model's overall accuracy. After including these unreliable inferences from the test set, the model's accuracy can be significantly changed. Therefore, we recommend that developers should pay more attention to these unreliable inferences during the model evaluations. We also explored the correlation between model accuracy and the size of unreliable inferences. We found the inferences of the input with smaller objects are easier to be unreliable. Lastly, we found that the current model training methodologies can guide the models to learn object-relevant features to certain extent, but may not necessarily prevent the model from making unreliable inferences. We encourage the community to propose more effective training methodologies to address this issue.},\n issn={1573-7616},\n doi={10.1007/s10664-021-09985-1},\n url={https://doi.org/10.1007/s10664-021-09985-1}\n }","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"EvalDNN: a toolbox for evaluating deep neural network models","date":"2020","authors":["Yongqiang Tian","Zhihua Zeng","Ming Wen","Yepang Liu","Tzu-yang Kuo","Shing-Chi Cheung"],"venue":"42nd International Conference on Software Engineering, Demo","venueShort":"ICSE Demo","tags":["Testing","DNN model","Benchmark"],"projectUrl":"https://github.com/yqtianust/EvalDNN","paperUrl":"https://doi.org/10.1145/3377812.3382133","arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"A Comprehensive Study of Deep Learning Compiler Bugs","date":"2021","authors":["Qingchao Shen","Haoyang Ma","Junjie Chen","Yongqiang Tian","Shing-Chi Cheung","Xiang Chen"],"venue":"Proceedings of the 29th ACM Joint European SoftwareEngineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE ’21)","venueShort":"ESEC/FSE","tags":["DL Compiler","Empirical Study"],"projectUrl":"https://github.com/ShenQingchao/DLCstudy","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"AdvDoor: Adversarial Backdoor Attack of Deep Learning System","date":"2021","authors":["Quan Zhang","Yifeng Ding","Yongqiang Tian","Jianmin Guo","Min Yuan","Yu Jiang"],"venue":"ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","tags":["DNN model","Backdoor Attack"],"projectUrl":"https://github.com/AdvDoor/AdvDoor","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Finding Deviated Behaviors of the Compressed DNN Models for Image Classifications.","date":"2023","authors":["Yongqiang Tian","Wuqi Zhang","Ming Wen","Shing-Chi Cheung","Chengnian Sun","Shiqing Ma","Yu Jiang"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["DNN model"],"projectUrl":"https://dl.acm.org/doi/abs/10.1145/3583564","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Revisiting the Evaluation of Deep Learning-Based Compiler Testing.","date":"2023","authors":["Yongqiang Tian","Zhenyang Xu","Yiwen Dong","Chengnian Sun","Shing-Chi Cheung"],"venue":"The 32nd International Joint Conference on Artificial Intelligence","venueShort":"IJCAI","tags":["Compiler testing"],"projectUrl":null,"paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"On the Caching Schemes to Speed Up Program Reduction.","date":"2023","authors":["Yongqiang Tian","Xueyan Zhang","Yiwen Dong","Zhenyang Xu","Mengxiao Zhang","Yu Jiang","Shing-Chi Cheung","Chengnian Sun"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["Program Reduction"],"projectUrl":"https://github.com/uw-pluverse/perses/blob/master/doc/RCC.md","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]}] \ No newline at end of file +[{"title":"Managing Software Supply Chains - Theory and Practice.","date":"2025","authors":["Ying Wang","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"","venueShort":"","abstract":"This book offers a comprehensive literature review on software supply chains, studies on dependency hell issues, and a toolkit and datasets to combat them","tags":["Dependency Management","Third-Party Libraries","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/978-981-96-1797-5","bibtex":"@book{DBLP:books/sp/WangCYZ25,\n author = {Ying Wang and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Managing Software Supply Chains - Theory and Practice},\n publisher = {Springer},\n year = {2025},\n url = {https://doi.org/10.1007/978-981-96-1797-5},\n doi = {10.1007/978-981-96-1797-5},\n isbn = {978-981-96-1796-8},\n timestamp = {Wed, 09 Apr 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/books/sp/WangCYZ25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"A study on prompt design, advantages and limitations of ChatGPT for deep learning program repair.","date":"2025","authors":["Jialun Cao","Meiziniu Li","Ming Wen","Shing-Chi Cheung"],"venue":"Automated Software Engineering","venueShort":"ASEJ","abstract":"The emergence of large language models (LLMs) such as ChatGPT has revolutionized many fields. In particular, recent advances in LLMs have triggered various studies examining the use of these models for software development tasks, such as program repair, code understanding, and code generation. Prior studies have shown the capability of ChatGPT in repairing conventional programs. However, debugging deep learning (DL) programs poses unique challenges since the decision logic is not directly encoded in the source code. This requires LLMs to not only parse the source code syntactically but also understand the intention of DL programs. Therefore, ChatGPT’s capability in repairing DL programs remains unknown. To fill this gap, our study aims to answer three research questions: (1) Can ChatGPT debug DL programs effectively? (2) How can ChatGPT’s repair performance be improved by prompting? (3) In which way can dialogue help facilitate the repair? Our study analyzes the typical information that is useful for prompt design and suggests enhanced prompt templates that are more efficient for repairing DL programs. On top of them, we summarize the dual perspectives (i.e., advantages and disadvantages) of ChatGPT’s ability, such as its handling of API misuse and recommendation, and its shortcomings in identifying default parameters. Our findings indicate that ChatGPT has the potential to repair DL programs effectively and that prompt engineering and dialogue can further improve its performance by providing more code intention. We also identified the key intentions that can enhance ChatGPT’s program repairing capability.","tags":["LLM","Program Repair","Deep Learning","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/s10515-025-00492-x","bibtex":"@article{DBLP:journals/ase/CaoLWC25,\n author = {Jialun Cao and\n Meiziniu Li and\n Ming Wen and\n Shing{-}Chi Cheung},\n title = {A study on prompt design, advantages and limitations of ChatGPT for\n deep learning program repair},\n journal = {Autom. Softw. Eng.},\n volume = {32},\n number = {1},\n pages = {30},\n year = {2025},\n url = {https://doi.org/10.1007/s10515-025-00492-x},\n doi = {10.1007/S10515-025-00492-X},\n timestamp = {Sun, 15 Jun 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/ase/CaoLWC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"How far are app secrets from being stolen? a case study on android.","date":"2025","authors":["Lili Wei","Heqing Huang","Shing-Chi Cheung","Kevin Li"],"venue":"Empirical Software Engineering","venueShort":"EMSE","abstract":"Android apps can hold secret strings of themselves such as cloud service credentials or encryption keys. Leakage of such secret strings can induce unprecedented consequences like monetary losses or leakage of user private information. In practice, various security issues were reported because many apps failed to protect their secrets. However, litte is known about the types, usages, exploitability, and consequences of app secret leakage issues. While a large body of literature has been devoted to studying user private information leakage, there is no systematic study characterizing app secret leakage issues. How far are Android app secrets from being stolen? To bridge this gap, we conducted the first systematic study to characterize app secret leakage issues in Android apps based on 575 potential app secrets sampled from 14,665 popular Android apps on Google Play. We summarized the common categories of leaked app secrets, assessed their security impacts and disclosed app bad practices in storing app secrets. We devised a text mining strategy using regular expressions and demonstrated that numerous app secrets can be easily stolen, even from the highly popular Android apps on Google. In a follow-up study, we harvested 3,711 distinct exploitable app secrets through automatic analysis. Our findings highlight the prevalence of this problem and call for greater attention to app secret protection.","tags":["Android","Security","Empirical study","regular expression"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/s10664-024-10607-9","bibtex":"@article{DBLP:journals/ese/WeiHCL25,\n author = {Lili Wei and\n Heqing Huang and\n Shing{-}Chi Cheung and\n Kevin Li},\n title = {How far are app secrets from being stolen? a case study on android},\n journal = {Empir. Softw. Eng.},\n volume = {30},\n number = {3},\n pages = {90},\n year = {2025},\n url = {https://doi.org/10.1007/s10664-024-10607-9},\n doi = {10.1007/S10664-024-10607-9},\n timestamp = {Fri, 16 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/ese/WeiHCL25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"On state reverting in solidity smart contracts: Developer practices, fault categorization, and tool evaluation.","date":"2025","authors":["Lu Liu","Lili Wei","Wuqi Zhang","Shuqing Li","Yifan Zhou","Yepang Liu","Shing-Chi Cheung","Michael R. Lyu"],"venue":"Empirical Software Engineering","venueShort":"EMSE","abstract":"Smart contracts are computer programs deployed on blockchains to facilitate transactions. A critical aspect of smart contract security is the use of state-reverting statements (e.g., require, if...revert, if...throw). These statements protect transactions from abnormal behaviors or malicious attacks by reverting a contract to its previous state when certain input constraints or security properties are violated. While essential, the correct use of these state-reverting (SR) statements is nontrivial. Improper use can lead to security vulnerabilities, resulting in substantial financial losses or other severe consequences. It is, therefore, highly important to understand developers’ practices of state reverting in smart contracts and the common mistakes they make. To achieve this goal, we conduct the first comprehensive empirical study on the use of SR statements and their related faults in Solidity smart contracts. First, we analyze the prevalence and purposes of SR statements in 21,414 verified contracts from popular decentralized applications (dapps) and manually examine 381 SR statements, leading to a taxonomy of their uses. Second, we collect 320 real-world state-reverting faults (SR faults) from open-source projects on GitHub and audit reports on Code4rena. We categorize the SR faults into 17 types and summarize 12 distinct fixing strategies. This knowledge can help researchers and practitioners to better understand the common usages of SR statements and learn how to prevent or cope with SR faults. Lastly, the variety of SR fault types and the presence of high-risk issues highlight the need for automated tools to identify and mitigate these faults. This further motivates us to assess the SR fault detection performance of state-of-the-art security analyzers, with the aim of understanding their capability and identifying their deficiencies. Via evaluating 12 representative tools on a benchmark comprising 243 contracts with six types of SR faults and the corresponding patched versions, we observe that existing tools exhibit limited capabilities in detecting SR faults (the average detection rate is 14.4%). This result underscores the need for more advanced security analysis tools specifically tailored for SR faults. To facilitate the development of such tools, we further provide a comprehensive analysis of three common limitations of existing tools.","tags":["Smart Contracts","Empirical study","Security","Vulnerability"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/s10664-025-10685-3","bibtex":"@article{DBLP:journals/ese/LiuWZLZLCL25,\n author = {Lu Liu and\n Lili Wei and\n Wuqi Zhang and\n Shuqing Li and\n Yifan Zhou and\n Yepang Liu and\n Shing{-}Chi Cheung and\n Michael R. Lyu},\n title = {On state reverting in solidity smart contracts: Developer practices,\n fault categorization, and tool evaluation},\n journal = {Empir. Softw. Eng.},\n volume = {30},\n number = {5},\n pages = {141},\n year = {2025},\n url = {https://doi.org/10.1007/s10664-025-10685-3},\n doi = {10.1007/S10664-025-10685-3},\n timestamp = {Tue, 05 Aug 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/ese/LiuWZLZLCL25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Understanding and Characterizing Mock Assertions in Unit Tests.","date":"2025","authors":["Hengcheng Zhu","Valerio Terragni","Lili Wei","Shing-Chi Cheung","Jiarong Wu","Yepang Liu"],"venue":"Proceedings of the ACM on Software Engineering","venueShort":"FSE","abstract":"Mock assertions provide developers with a powerful means to validate program behaviors that are unobservable to test assertions. Despite their significance, they are rarely considered by automated test generation techniques. Effective generation of mock assertions requires understanding how they are used in practice. Although previous studies highlighted the importance of mock assertions, none provide insight into their usages. To bridge this gap, we conducted the first empirical study on mock assertions, examining their adoption, the characteristics of the verified method invocations, and their effectiveness in fault detection. Our analysis of 4,652 test cases from 11 popular Java projects reveals that mock assertions are mostly applied to validating specific kinds of method calls, such as those interacting with external resources and those reflecting whether a certain code path was traversed in systems under test. Additionally, we find that mock assertions complement traditional test assertions by ensuring the desired side effects have been produced, validating control flow logic, and checking internal computation results. Our findings contribute to a better understanding of mock assertion usages and provide a foundation for future related research such as automated test generation that support mock assertions.","tags":["Mocking","Empirical study","Unit Test","Java"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3715741","bibtex":"@article{DBLP:journals/pacmse/ZhuTWCWL25,\n author = {Hengcheng Zhu and\n Valerio Terragni and\n Lili Wei and\n Shing{-}Chi Cheung and\n Jiarong Wu and\n Yepang Liu},\n title = {Understanding and Characterizing Mock Assertions in Unit Tests},\n journal = {Proc. {ACM} Softw. Eng.},\n volume = {2},\n number = {{FSE}},\n pages = {554--575},\n year = {2025},\n url = {https://doi.org/10.1145/3715741},\n doi = {10.1145/3715741},\n timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/pacmse/ZhuTWCWL25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"SemBIC: Semantic-Aware Identification of Bug-Inducing Commits.","date":"2025","authors":["Xiao Chen","Hengcheng Zhu","Jialun Cao","Ming Wen","Shing-Chi Cheung"],"venue":"Proceedings of the ACM on Software Engineering","venueShort":"FSE","abstract":"Debugging can be much facilitated if one can identify the evolution commit that introduced the bug leading to a detected failure (aka. bug-inducing commit, BIC). Although one may, in theory, locate BICs by executing the detected failing test on various historical commit versions, it is impractical when the test cannot be executed on some of those versions. On the other hand, existing static techniques often assume the availability of additional information such as patches and bug reports, or the applicability of predefined heuristics like commit chronology. However, these approaches are ineffective when such assumptions do not hold, which are often the case in practice. To address these limitations, we propose SEMBIC to identify the BIC of a bug by statically tracking the semantic changes in the execution path prescribed by the failing test across successive historical commit versions. Our insight is that the greater the semantic changes a commit introduces concerning the failing execution path of a target bug, the more likely it is to be the BIC. To distill semantic changes relevant to the failure, we focus on three fine-grained semantic properties. We evaluate the performance of SEMBIC on a benchmark containing 199 real-world bugs from 12 open-source projects. We found that SEMBIC can identify BICs with high accuracy – it ranks the BIC as top 1 for 88 out of 199 bugs, and achieves an MRR of 0.520, outperforming the state-of-the-art technique by 29.4% and 13.6%, respectively.","tags":["Bug Detection","Program Analysis","Empirical study","Testing"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3715781","bibtex":"@article{DBLP:journals/pacmse/ChenZCWC25,\n author = {Xiao Chen and\n Hengcheng Zhu and\n Jialun Cao and\n Ming Wen and\n Shing{-}Chi Cheung},\n title = {SemBIC: Semantic-Aware Identification of Bug-Inducing Commits},\n journal = {Proc. {ACM} Softw. Eng.},\n volume = {2},\n number = {{FSE}},\n pages = {1363--1385},\n year = {2025},\n url = {https://doi.org/10.1145/3715781},\n doi = {10.1145/3715781},\n timestamp = {Mon, 23 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/pacmse/ChenZCWC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"An Empirical Study of Bugs in Data Visualization Libraries.","date":"2025","authors":["Weiqi Lu","Yongqiang Tian","Xiaohan Zhong","Haoyang Ma","Zhenyang Xu","Shing-Chi Cheung","Chengnian Sun"],"venue":"Proceedings of the ACM on Software Engineering","venueShort":"FSE","abstract":"Data visualization (DataViz) libraries play a crucial role in presentation, data analysis, and application development, underscoring the importance of their accuracy in transforming data into visual representations. Incorrect visualizations can adversely impact user experience, distort information conveyance, and influence user perception and decision-making processes. Visual bugs in these libraries can be particularly insidious as they may not cause obvious errors like crashes, but instead mislead users of the underlying data graphically, resulting in wrong decision making. Consequently, a good understanding of the unique characteristics of bugs in DataViz libraries is essential for researchers and developers to detect and fix bugs in DataViz libraries. This study presents the first comprehensive analysis of bugs in DataViz libraries, examining 564 bugs collected from five widely-used libraries. Our study systematically analyzes their symptoms and root causes, and provides a detailed taxonomy. We found that incorrect/inaccurate plots are pervasive in DataViz libraries and incorrect graphic computation is the major root cause, which necessitates further automated testing methods for DataViz libraries. Moreover, we identified eight key steps to trigger such bugs and two test oracles specific to DataViz libraries, which may inspire future research in designing effective automated testing techniques. Furthermore, with the recent advancements in Vision Language Models (VLMs), we explored the feasibility of applying these models to detect incorrect/inaccurate plots. The results show that the effectiveness of VLMs in bug detection varies from 29% to 57%, depending on the prompts, and adding more information in prompts does not necessarily increase the effectiveness. Our findings offer valuable insights into the nature and patterns of bugs in DataViz libraries, providing a foundation for developers and researchers to improve library reliability, and ultimately benefit more accurate and reliable data visualizations across various domains.","tags":["Data Visualization","Bug Detection","Empirical study","Vision Language Models"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3729363","bibtex":"@article{DBLP:journals/pacmse/LuTZMXCS25,\n author = {Weiqi Lu and\n Yongqiang Tian and\n Xiaohan Zhong and\n Haoyang Ma and\n Zhenyang Xu and\n Shing{-}Chi Cheung and\n Chengnian Sun},\n title = {An Empirical Study of Bugs in Data Visualization Libraries},\n journal = {Proc. {ACM} Softw. Eng.},\n volume = {2},\n number = {{FSE}},\n pages = {2075--2098},\n year = {2025},\n url = {https://doi.org/10.1145/3729363},\n doi = {10.1145/3729363},\n timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/pacmse/LuTZMXCS25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Question Selection for Multimodal Code Search Synthesis Using Probabilistic Version Spaces.","date":"2025","authors":["Jiarong Wu","Yanyan Jiang","Lili Wei","Congying Xu","Shing-Chi Cheung","Chang Xu"],"venue":"IEEE Transactions on Software Engineering","venueShort":"TSE","abstract":"Searching the occurrences of specific code patterns (code search) is a common task in software engineering, and programming by example (PBE) techniques have been applied to ease customizing code patterns. However, previous PBE tools only synthesize programs meeting the input-output examples, which may not always align with the user intent. To bridge this gap, this paper proposes Excalibur, a multi-modal (example and natural language description) and interactive synthesizer for code search. Excalibur ensures that the generated programs are correct for the provided examples (soundness) and include the user-intended program (bounded completeness). Furthermore, Excalibur helps the user identify the user-intended program through question-answer interaction. To minimize the required interaction efforts, question selection is crucial. To improve question selection for code search, we propose probabilistic version spaces (ProbVS), in which the user-intended program’s probability is high and others are low. ProbVS combines traditional version spaces for compactly representing extensive programs and large language models (on the user-provided natural language description) for adjusting programs’ probabilities to align with users’ intents. Extensive experiments on a benchmark of 44 tasks demonstrated the effectiveness of Excalibur and ProbVS and demystified how ProbVS affects probability distributions and how the configurable parameters affect ProbVS.","tags":["Programming by Example","Program Synthesis","Code Search","LLM"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/TSE.2025.3565387","bibtex":"@article{DBLP:journals/tse/WuJWXCX25,\n author = {Jiarong Wu and\n Yanyan Jiang and\n Lili Wei and\n Congying Xu and\n Shing{-}Chi Cheung and\n Chang Xu},\n title = {Question Selection for Multimodal Code Search Synthesis Using Probabilistic\n Version Spaces},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {51},\n number = {6},\n pages = {1724--1744},\n year = {2025},\n url = {https://doi.org/10.1109/TSE.2025.3565387},\n doi = {10.1109/TSE.2025.3565387},\n timestamp = {Sun, 06 Jul 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/tse/WuJWXCX25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"DOMAINEVAL: An Auto-Constructed Benchmark for Multi-Domain Code Generation.","date":"2025","authors":["Qiming Zhu","Jialun Cao","Yaojie Lu","Hongyu Lin","Xianpei Han","Le Sun","Shing-Chi Cheung"],"venue":"Thirty-Ninth AAAI Conference on Artificial Intelligence","venueShort":"AAAI","abstract":"Code benchmarks such as HumanEval are widely adopted to evaluate the capabilities of Large Language Models (LLMs), providing insights into their strengths and weaknesses. However, current benchmarks primarily exercise LLMs' capability on common coding tasks (e.g., bubble sort, greatest common divisor), leaving domain-specific coding tasks (e.g., computation, system, cryptography) unexplored. To fill this gap, we propose a multi-domain code benchmark, DOMAINEVAL, designed to evaluate LLMs' coding capabilities thoroughly. Our pipeline works in a fully automated manner, enabling a push-button construction from code repositories into formatted subjects under study. Interesting findings are observed by evaluating 12 representative LLMs against DOMAINEVAL. We notice that LLMs are generally good at computation tasks while falling short on cryptography and system coding tasks. The performance gap can be as much as 68.94% (80.94% - 12.0%) in some LLMs. We also observe that generating more samples can increase the overall performance of LLMs, while the domain bias may even increase. The contributions of this study include a code generation benchmark dataset DOMAINEVAL, encompassing six popular domains, a fully automated pipeline for constructing code benchmarks, and an identification of the limitations of LLMs in code generation tasks based on their performance on DOMAINEVAL, providing directions for future research improvements.","tags":["LLM","Benchmark","Code Generation","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1609/aaai.v39i24.34811","bibtex":"@inproceedings{DBLP:conf/aaai/ZhuC0LH0C25,\n author = {Qiming Zhu and\n Jialun Cao and\n Yaojie Lu and\n Hongyu Lin and\n Xianpei Han and\n Le Sun and\n Shing{-}Chi Cheung},\n editor = {Toby Walsh and\n Julie Shah and\n Zico Kolter},\n title = {{DOMAINEVAL:} An Auto-Constructed Benchmark for Multi-Domain Code\n Generation},\n booktitle = {Thirty-Ninth {AAAI} Conference on Artificial Intelligence, Thirty-Seventh\n Conference on Innovative Applications of Artificial Intelligence,\n Fifteenth Symposium on Educational Advances in Artificial Intelligence,\n {AAAI} 2025, Philadelphia, PA, USA, February 25 - March 4, 2025},\n pages = {26148--26156},\n publisher = {{AAAI} Press},\n year = {2025},\n url = {https://doi.org/10.1609/aaai.v39i24.34811},\n doi = {10.1609/AAAI.V39I24.34811},\n timestamp = {Wed, 18 Mar 2026 17:07:12 +0100},\n biburl = {https://dblp.org/rec/conf/aaai/ZhuC0LH0C25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"CRUXEVAL-X: A Benchmark for Multilingual Code Reasoning, Understanding and Execution.","date":"2025","authors":["Ruiyang Xu","Jialun Cao","Yaojie Lu","Ming Wen","Hongyu Lin","Xianpei Han","Ben He","Shing-Chi Cheung","Le Sun"],"venue":"the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","venueShort":"ACL","abstract":"Ruiyang Xu, Jialun Cao, Yaojie Lu, Ming Wen, Hongyu Lin, Xianpei Han, Ben He, Shing-Chi Cheung, Le Sun. Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2025.","tags":["LLM","Program Synthesis","Natural Language Processing","Empirical Study"],"arxivUrl":null,"paperUrl":"https://aclanthology.org/2025.acl-long.1158/","bibtex":"@inproceedings{DBLP:conf/acl/XuC00LHHC025,\n author = {Ruiyang Xu and\n Jialun Cao and\n Yaojie Lu and\n Ming Wen and\n Hongyu Lin and\n Xianpei Han and\n Ben He and\n Shing{-}Chi Cheung and\n Le Sun},\n editor = {Wanxiang Che and\n Joyce Nabende and\n Ekaterina Shutova and\n Mohammad Taher Pilehvar},\n title = {{CRUXEVAL-X:} {A} Benchmark for Multilingual Code Reasoning, Understanding\n and Execution},\n booktitle = {Proceedings of the 63rd Annual Meeting of the Association for Computational\n Linguistics (Volume 1: Long Papers), {ACL} 2025, Vienna, Austria,\n July 27 - August 1, 2025},\n pages = {23762--23779},\n publisher = {Association for Computational Linguistics},\n year = {2025},\n url = {https://aclanthology.org/2025.acl-long.1158/},\n timestamp = {Sun, 02 Nov 2025 21:27:24 +0100},\n biburl = {https://dblp.org/rec/conf/acl/XuC00LHHC025.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"From Informal to Formal - Incorporating and Evaluating LLMs on Natural Language Requirements to Verifiable Formal Proofs.","date":"2025","authors":["Jialun Cao","Yaojie Lu","Meiziniu Li","Haoyang Ma","Haokun Li","Mengda He","Cheng Wen","Le Sun","Hongyu Zhang","Shengchao Qin","Shing-Chi Cheung","Cong Tian"],"venue":"the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","venueShort":"ACL","abstract":"Jialun Cao, Yaojie Lu, Meiziniu Li, Haoyang Ma, Haokun Li, Mengda He, Cheng Wen, Le Sun, Hongyu Zhang, Shengchao Qin, Shing-Chi Cheung, Cong Tian. Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2025.","tags":["LLM","Program Synthesis","Empirical study"],"arxivUrl":null,"paperUrl":"https://aclanthology.org/2025.acl-long.1310/","bibtex":"@inproceedings{DBLP:conf/acl/Cao0LMLH000QCT25,\n author = {Jialun Cao and\n Yaojie Lu and\n Meiziniu Li and\n Haoyang Ma and\n Haokun Li and\n Mengda He and\n Cheng Wen and\n Le Sun and\n Hongyu Zhang and\n Shengchao Qin and\n Shing{-}Chi Cheung and\n Cong Tian},\n editor = {Wanxiang Che and\n Joyce Nabende and\n Ekaterina Shutova and\n Mohammad Taher Pilehvar},\n title = {From Informal to Formal - Incorporating and Evaluating LLMs on Natural\n Language Requirements to Verifiable Formal Proofs},\n booktitle = {Proceedings of the 63rd Annual Meeting of the Association for Computational\n Linguistics (Volume 1: Long Papers), {ACL} 2025, Vienna, Austria,\n July 27 - August 1, 2025},\n pages = {26984--27003},\n publisher = {Association for Computational Linguistics},\n year = {2025},\n url = {https://aclanthology.org/2025.acl-long.1310/},\n timestamp = {Tue, 24 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/acl/Cao0LMLH000QCT25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"A Tale of Two DL Cities: When Library Tests Meet Compiler.","date":"2025","authors":["Qingchao Shen","Yongqiang Tian","Haoyang Ma","Junjie Chen","Lili Huang","Ruifeng Fu","Shing-Chi Cheung","Zan Wang"],"venue":"47th IEEE/ACM International Conference on Software Engineering","venueShort":"ICSE","abstract":"Deep Learning (DL) compilers typically load a DL model and optimize it with intermediate representation. Existing DL compiler testing techniques mainly focus on model optimization stages, but rarely explore bug detection at the model loading stage. Effectively testing the model loading stage requires covering diverse usages of each DL operator from various DL libraries, which shares a common objective with DL library testing, indicating that the embedded knowledge in DL library tests is beneficial for testing the model loading stage of DL compilers. With this idea, we propose Opera to migrate the knowledge embedded in DL library tests to test the model loading stage. Opera constructs diverse tests from various tests for DL libraries (including the tests documented in DL libraries and those generated by recent fuzzers). In total, we considered three sources of tests in DL libraries for migration. In addition, it incorporates a diversity-based test prioritization strategy to migrate and execute those tests that are more likely to detect diverse bugs earlier. We then used eight frontends from three DL compilers (e.g., TVM, TensorRT, and OpenVINO) for evaluation. OPERA detected 170 previously unknown bugs in total, 90 of which have been confirmed/fixed by developers, demonstrating the effectiveness of such the migration-based idea. The test prioritization strategy in OPERA improves testing efficiency with migrated tests by $11.9 \\% \\sim 47.4 \\%$ on average compared to general test prioritization strategies.","tags":["DL Compiler","Testing","Metamorphic Testing","Program Analysis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ICSE55347.2025.00025","bibtex":"@inproceedings{DBLP:conf/icse/ShenTMCHFCW25,\n author = {Qingchao Shen and\n Yongqiang Tian and\n Haoyang Ma and\n Junjie Chen and\n Lili Huang and\n Ruifeng Fu and\n Shing{-}Chi Cheung and\n Zan Wang},\n title = {A Tale of Two {DL} Cities: When Library Tests Meet Compiler},\n booktitle = {47th {IEEE/ACM} International Conference on Software Engineering,\n {ICSE} 2025, Ottawa, ON, Canada, April 26 - May 6, 2025},\n pages = {2201--2212},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ICSE55347.2025.00025},\n doi = {10.1109/ICSE55347.2025.00025},\n timestamp = {Fri, 04 Jul 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/icse/ShenTMCHFCW25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Differential Testing of Concurrent Classes.","date":"2025","authors":["Valerio Terragni","Shing-Chi Cheung"],"venue":"IEEE Conference on Software Testing","venueShort":"ICST","abstract":"Concurrent programs are pervasive, yet difficult to write. The inherent complexity of thread synchronization makes the evolution of concurrent programs prone to concurrency faults. Previous work on regression testing concurrent programs focused on reducing the cost of re-run the existing tests. However, existing tests may not be able to expose the regression faults in the modified program. In this paper, we present Condiff a differential testing technique that generates concurrent tests and oracles to expose behavioral differences between two versions of a given concurrent class. Since concurrent programs are non-deterministic, this involves exploring all possible non-deterministic thread interleavings of each generated test on both versions. However, we can afford to analyze only a few concurrent tests due to the high cost of exhaustive interleaving exploration. To address the challenge, Condiff leverages the information of code changes and trace analysis to analyze only those concurrent tests that are likely to expose behavioral differences (if they exist). We evaluated Condiff on a set of Java classes. Our results show that Condiff can effectively generate concurrent tests that expose behavioral differences.","tags":["Concurrency","Differential Testing","Regression Testing","Java"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ICST62969.2025.10989027","bibtex":"@inproceedings{DBLP:conf/icst/TerragniC25,\n author = {Valerio Terragni and\n Shing{-}Chi Cheung},\n title = {Differential Testing of Concurrent Classes},\n booktitle = {{IEEE} Conference on Software Testing, Verification and Validation,\n {ICST} 2025, Napoli, Italy, March 31 - April 4, 2025},\n pages = {255--266},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ICST62969.2025.10989027},\n doi = {10.1109/ICST62969.2025.10989027},\n timestamp = {Fri, 30 May 2025 12:14:04 +0200},\n biburl = {https://dblp.org/rec/conf/icst/TerragniC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"CodeCleaner: Mitigating Data Contamination for LLM Benchmarking.","date":"2025","authors":["Jialun Cao","Songqiang Chen","Wuqi Zhang","Hau Ching Lo","Yeting Li","Shing-Chi Cheung"],"venue":"the 16th International Conference on Internetware","venueShort":"Internetware","abstract":"Data contamination presents a critical barrier preventing widespread industrial adoption of advanced software engineering techniques that leverage large language models (LLMs). This phenomenon occurs when evaluation data inadvertently overlaps with the public code repositories used to train LLMs, severely undermining the credibility of performance evaluations. Code refactoring, which comprises code restructuring and variable renaming, has emerged as a promising measure to mitigate data contamination. However, the lack of automated code refactoring tools and scientifically validated refactoring techniques has hampered widespread industrial implementation. To bridge the gap, this paper presents the first systematic study to examine the efficacy of code refactoring operators at multiple scales (method-level, class-level, and cross-class level) and in different programming languages. We develop CodeCleaner, including 11 operators for Python in multiple scales and 4 for Java. We elaborate on the rationale for why these operators could work to resolve data contamination and use both data-wise (e.g., N-gram matching overlap ratio) and model-wise metrics (e.g., perplexity) to quantify the efficacy after operators are applied. A drop of 75% overlap ratio is found when applying all operators in CodeCleaner, demonstrating their effectiveness in addressing data contamination. Besides, we migrate four operators to Java, showing their generalizability to another language. We also observed an average of 19% decrease in LLMs’ performance after applying our operators. We make CodeCleaner online available at https://github.com/ArabelaTso/CodeCleaner-v1 to facilitate further studies on mitigating LLM data contamination.","tags":["LLM","Data Contamination","Code Refactoring","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3755881.3755901","bibtex":"@inproceedings{DBLP:conf/internetware/CaoCZLLC25,\n author = {Jialun Cao and\n Songqiang Chen and\n Wuqi Zhang and\n Hau Ching Lo and\n Yeting Li and\n Shing{-}Chi Cheung},\n editor = {Hong Mei and\n Jian Lv and\n Zhi Jin and\n Xuandong Li and\n Thomas Zimmermann and\n Ge Li and\n Lei Bu and\n Xin Xia},\n title = {CodeCleaner: Mitigating Data Contamination for {LLM} Benchmarking},\n booktitle = {Proceedings of the 16th International Conference on Internetware,\n Internetware 2025, Trondheim, Norway, June 20-22, 2025},\n pages = {71--83},\n publisher = {{ACM}},\n year = {2025},\n url = {https://doi.org/10.1145/3755881.3755901},\n doi = {10.1145/3755881.3755901},\n timestamp = {Thu, 05 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/internetware/CaoCZLLC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Cross2OH: Enabling Seamless Porting of C/C++ Software Libraries to OpenHarmony.","date":"2025","authors":["Qian Zhang","Tsz-On Li","Ying Wang","Li Li","Shing-Chi Cheung"],"venue":"40th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"OpenHarmony is a new mobile operating system that offers a popular alternative to Android and iOS. To support its adoption, significant efforts have been devoted to porting C/C++ libraries from Linux to OpenHarmony. However, this porting process presents unique challenges due to the fundamental architectural differences in system libraries, runtime environments, and build systems between the two platforms. These discrepancies manifest as Cross-platform Incompatibility (CPI) issues during cross-compilation, which are particularly difficult to resolve for two key reasons. First, conventional cross-compilation toolchains provide only brief error messages that offer inadequate diagnostic information for CPI issues. Second, resolving these issues requires a deep understanding of cross-platform discrepancies, yet comprehensive documentation or systematic guidelines about such Linux-to-OpenHarmony differences remain largely unavailable.In this experience paper, to assist developers in addressing these challenges, we conducted an empirical study on 92 C/C++ libraries successfully ported to OpenHarmony. Through manual step-by-step reproduction of all CPI issues, our study reveals that discrepancies between Linux and OpenHarmony can be divided into three categories, and CPI issues can manifest through eight dimensions. Furthermore, we identified eight common adaptation strategies for resolving CPI issues. Based on these findings, we present Cross2OH, an automated technique for porting Linux-based software to OpenHarmony. Our approach combines: (1) an adaptation knowledge base (derived from RQ1 and RQ2 findings) and (2) a static analysis approach to detect and patch eight types of CPI issues. Evaluation using real developer patches shows Cross2OH achieves 0.94 recall and 0.91 precision in resolving CPI issues. Notably, Cross2OH enables successful cross-compilation for 40 critical libraries (including dependencies for popular Android apps such as WeChat, Microsoft Excel, Bilibili), with 29 of them passed official OpenHarmony review. The evaluation results demonstrate Cross2OH’s potential to streamline the porting process and foster the growth of the OpenHarmony software ecosystem.","tags":["Compatibility Issues","Empirical study","Program Analysis","Testing"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE63991.2025.00146","bibtex":"@inproceedings{DBLP:conf/kbse/ZhangLWLC25,\n author = {Qian Zhang and\n Tsz{-}On Li and\n Ying Wang and\n Li Li and\n Shing{-}Chi Cheung},\n title = {Cross2OH: Enabling Seamless Porting of {C/C++} Software Libraries\n to OpenHarmony},\n booktitle = {40th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2025, Seoul, Korea, Republic of, November 16-20, 2025},\n pages = {1744--1755},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ASE63991.2025.00146},\n doi = {10.1109/ASE63991.2025.00146},\n timestamp = {Tue, 10 Feb 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/ZhangLWLC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Demystifying Cross-Language C/C++ Binaries: A Robust Software Component Analysis Approach.","date":"2025","authors":["Meiqiu Xu","Ying Wang","Wei Tang","Xian Zhan","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"40th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Binary Software Composition Analysis (BSCA) is a technique for identifying the versions of third-party libraries (TPLs) used in compiled binaries, thereby tracing the dependencies and vulnerabilities of software components without access to their source code. However, existing BSCA techniques struggle with cross-language invoked C/C++ binaries in polyglot projects due to two key challenges: (1) interference from heterogeneous Foreign Function Interface (FFI) bindings that obscure distinctive TPL features and generate false positives during matching processes, and (2) the inherent complexity of composite binaries (fused binaries), particularly prevalent in polyglot development where multiple TPLs are frequently compiled into single executable units, resulting in blurred boundaries between libraries and substantially compromising version identification precision.We propose DeeperBin, a BSCA technique that addresses these challenges through a high-quality, large-scale feature database with four key advantages: (1) high scalability that is capable of analyzing 74,647 C/C++ TPL versions, (2) efficient noise filtering to remove FFI bindings and common functions, (3) automated extraction of version string regexes for 31,855 TPL versions, and (4) generation of distinctive version features using the Minimum Description Length (MDL) principle. Evaluated on 418 cross-language binaries, DeeperBin achieves 81.2% precision and 84.6% recall for TPL detection, outperforming state-of-the-art (SOTA) techniques by 14.1% and 23.2%, respectively. For version identification, it achieves 70.3% precision, a 12.6% improvement over state-of-the-art techniques. Ablation studies confirm the usefulness of FFI filtering and MDL-based features, boosting precision and recall by 17.1% and 18.8%. DeeperBin also maintains competitive efficiency, processing binaries in 364.3 seconds while supporting the largest feature database.","tags":["Third-Party Libraries","Binary Software Composition Analysis","Program Analysis","Fault Detection"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE63991.2025.00148","bibtex":"@inproceedings{DBLP:conf/kbse/XuWTZCYZ25,\n author = {Meiqiu Xu and\n Ying Wang and\n Wei Tang and\n Xian Zhan and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Demystifying Cross-Language {C/C++} Binaries: {A} Robust Software\n Component Analysis Approach},\n booktitle = {40th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2025, Seoul, Korea, Republic of, November 16-20, 2025},\n pages = {1768--1780},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ASE63991.2025.00148},\n doi = {10.1109/ASE63991.2025.00148},\n timestamp = {Mon, 09 Feb 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/XuWTZCYZ25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"LspFuzz: Hunting Bugs in Language Servers.","date":"2025","authors":["Hengcheng Zhu","Songqiang Chen","Valerio Terragni","Lili Wei","Yepang Liu","Jiarong Wu","Shing-Chi Cheung"],"venue":"40th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"The Language Server Protocol (LSP) has revolutionized the integration of code intelligence in modern software development. There are approximately 300 LSP server implementations for various languages and 50 editors offering LSP integration. However, the reliability of LSP servers is a growing concern, as crashes can disable all code intelligence features and significantly impact productivity, while vulnerabilities can put developers at risk even when editing untrusted source code. Despite the widespread adoption of LSP, no existing techniques specifically target LSP server testing. To bridge this gap, we present LspFuzz, a grey-box hybrid fuzzer for systematic LSP server testing. Our key insight is that effective LSP server testing requires holistic mutation of source code and editor operations, as bugs often manifest from their combinations. To satisfy the sophisticated constraints of LSP and effectively explore the input space, we employ a two-stage mutation pipeline: syntax-aware mutations to source code, followed by context-aware dispatching of editor operations. We evaluated LspFuzz on four widely used LSP servers. LspFuzz demonstrated superior performance compared to baseline fuzzers, and uncovered previously unknown bugs in real-world LSP servers. Of the 51 bugs we reported, 42 have been confirmed, 26 have been fixed by developers, and two have been assigned CVE numbers. Our work advances the quality assurance of LSP servers, providing both a practical tool and foundational insights for future research in this domain.","tags":["Testing","Program Analysis","Security","Fault Detection"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE63991.2025.00183","bibtex":"@inproceedings{DBLP:conf/kbse/ZhuCTWLWC25,\n author = {Hengcheng Zhu and\n Songqiang Chen and\n Valerio Terragni and\n Lili Wei and\n Yepang Liu and\n Jiarong Wu and\n Shing{-}Chi Cheung},\n title = {LspFuzz: Hunting Bugs in Language Servers},\n booktitle = {40th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2025, Seoul, Korea, Republic of, November 16-20, 2025},\n pages = {2209--2221},\n publisher = {{IEEE}},\n year = {2025},\n url = {https://doi.org/10.1109/ASE63991.2025.00183},\n doi = {10.1109/ASE63991.2025.00183},\n timestamp = {Sun, 08 Feb 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/ZhuCTWLWC25.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Word Closure-Based Metamorphic Testing for Machine Translation.","date":"2024","authors":["Xiaoyuan Xie","Shuo Jin","Songqiang Chen","Shing-Chi Cheung"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","abstract":"With the wide application of machine translation, the testing of Machine Translation Systems (MTSs) has attracted much attention. Recent works apply Metamorphic Testing (MT) to address the oracle problem in MTS testing. Existing MT methods for MTS generally follow the workflow of input transformation and output relation comparison, which generates a follow-up input sentence by mutating the source input and compares the source and follow-up output translations to detect translation errors, respectively. These methods use various input transformations to generate the test case pairs and have successfully triggered numerous translation errors. However, they have limitations in performing fine-grained and rigorous output relation comparison and thus may report many false alarms and miss many true errors. In this article, we propose a word closure-based output comparison method to address the limitations of the existing MTS MT methods. We first propose word closure as a new comparison unit, where each closure includes a group of correlated input and output words in the test case pair. Word closures suggest the linkages between the appropriate fragment in the source output translation and its counterpart in the follow-up output for comparison. Next, we compare the semantics on the level of word closure to identify the translation errors. In this way, we perform a fine-grained and rigorous semantic comparison for the outputs and thus realize more effective violation identification. We evaluate our method with the test cases generated by five existing input transformations and the translation outputs from three popular MTSs. Results show that our method significantly outperforms the existing works in violation identification by improving the precision and recall and achieving an average increase of 29.9% in F1 score. It also helps to increase the F1 score of translation error localization by 35.9%.","tags":["Metamorphic Testing","Machine Translation","Testing","Semantic Comparison"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3675396","bibtex":"@article{DBLP:journals/tosem/XieJCC24,\n author = {Xiaoyuan Xie and\n Shuo Jin and\n Songqiang Chen and\n Shing{-}Chi Cheung},\n title = {Word Closure-Based Metamorphic Testing for Machine Translation},\n journal = {{ACM} Trans. Softw. Eng. Methodol.},\n volume = {33},\n number = {8},\n pages = {203:1--203:46},\n year = {2024},\n url = {https://doi.org/10.1145/3675396},\n doi = {10.1145/3675396},\n timestamp = {Sun, 02 Nov 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/tosem/XieJCC24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Shortening Overlong Method Names with Abbreviations.","date":"2024","authors":["Yanjie Jiang","Hui Liu","Shing-Chi Cheung","Lu Zhang"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","abstract":"Methods should be named to summarize their responsibilities meaningfully. When a method has a non-trivial responsibility, it may require a naming using multiple words. However, overlong method names are susceptible to typos and reduced readability (e.g., displaying a statement partially in standard screen width or splitting it into multiple lines). Programming naming conventions commonly adopt a maximal length (in characters) for identifiers. In practice, developers may not necessarily find a meaningful name that follows such naming conventions when coding a non-trivial method. This article presents the first automated technique (called NameCompressor ) to shorten overlong method names. Our inspiration is that many lengthy words/phrases in an overlong method name have known and unambiguous abbreviations. The use of these abbreviations for method names is common. To shorten an overlong method name, NameCompressor employs three compression techniques, i.e., context-aware compression, probability-based compression, and machine learning-based compression, to find appropriate abbreviations for the words/phrases in the method name. We evaluate NameCompressor on a dataset of 700 overlong method names. It correctly generates 613 short names identical to those specified by the developers of these methods.","tags":["Program Synthesis","Unit Test","Software Analytics","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3676959","bibtex":"@article{DBLP:journals/tosem/JiangLCZ24,\n author = {Yanjie Jiang and\n Hui Liu and\n Shing{-}Chi Cheung and\n Lu Zhang},\n title = {Shortening Overlong Method Names with Abbreviations},\n journal = {{ACM} Trans. Softw. Eng. Methodol.},\n volume = {33},\n number = {8},\n pages = {205:1--205:24},\n year = {2024},\n url = {https://doi.org/10.1145/3676959},\n doi = {10.1145/3676959},\n timestamp = {Sat, 25 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/tosem/JiangLCZ24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Enchanting Program Specification Synthesis by Large Language Models Using Static Analysis and Program Verification.","date":"2024","authors":["Cheng Wen","Jialun Cao","Jie Su","Zhiwu Xu","Shengchao Qin","Mengda He","Haokun Li","Shing-Chi Cheung","Cong Tian"],"venue":"Computer Aided Verification - 36th International Conference","venueShort":"CAV","abstract":"Formal verification provides a rigorous and systematic approach to ensure the correctness and reliability of software systems. Yet, constructing specifications for the full proof relies on domain expertise and non-trivial manpower. In view of such needs, an automated approach for specification synthesis is desired. While existing automated approaches are limited in their versatility, i.e., they either focus only on synthesizing loop invariants for numerical programs, or are tailored for specific types of programs or invariants. Programs involving multiple complicated data types (e.g., arrays, pointers) and code structures (e.g., nested loops, function calls) are often beyond their capabilities. To help bridge this gap, we present AutoSpec, an automated approach to synthesize specifications for automated program verification. It overcomes the shortcomings of existing work in specification versatility, synthesizing satisfiable and adequate specifications for full proof. It is driven by static analysis and program verification, and is empowered by large language models (LLMs). AutoSpec addresses the practical challenges in three ways: (1) driving AutoSpec by static analysis and program verification, LLMs serve as generators to generate candidate specifications, (2) programs are decomposed to direct the attention of LLMs, and (3) candidate specifications are validated in each round to avoid error accumulation during the interaction with LLMs. In this way, AutoSpec can incrementally and iteratively generate satisfiable and adequate specifications. The evaluation shows its effectiveness and usefulness, as it outperforms existing works by successfully verifying 79% of programs through automatic specification synthesis, a significant improvement of 1.592x. It can also be successfully applied to verify the programs in a real-world X509-parser project.","tags":["Formal Software Verification","Program Synthesis","LLM","Specification Synthesis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1007/978-3-031-65630-9_16","bibtex":"@inproceedings{DBLP:conf/cav/WenCSXQHLCT24,\n author = {Cheng Wen and\n Jialun Cao and\n Jie Su and\n Zhiwu Xu and\n Shengchao Qin and\n Mengda He and\n Haokun Li and\n Shing{-}Chi Cheung and\n Cong Tian},\n editor = {Arie Gurfinkel and\n Vijay Ganesh},\n title = {Enchanting Program Specification Synthesis by Large Language Models\n Using Static Analysis and Program Verification},\n booktitle = {Computer Aided Verification - 36th International Conference, {CAV}\n 2024, Montreal, QC, Canada, July 24-27, 2024, Proceedings, Part {II}},\n series = {Lecture Notes in Computer Science},\n pages = {302--328},\n publisher = {Springer},\n year = {2024},\n url = {https://doi.org/10.1007/978-3-031-65630-9\\_16},\n doi = {10.1007/978-3-031-65630-9\\_16},\n timestamp = {Mon, 23 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/cav/WenCSXQHLCT24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Towards Understanding the Bugs in Solidity Compiler.","date":"2024","authors":["Haoyang Ma","Wuqi Zhang","Qingchao Shen","Yongqiang Tian","Junjie Chen","Shing-Chi Cheung"],"venue":"the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","abstract":"Solidity compiler plays a key role in enabling the development of smart contract applications on Ethereum by governing the syntax of a domain-specific language called Solidity and performing compilation and optimization of Solidity code. The correctness of Solidity compiler is critical in fostering transparency, efficiency, and trust in industries reliant on smart contracts. However, like other software systems, Solidity compiler is prone to bugs, which may produce incorrect bytecodes on blockchain platforms, resulting in severe security concerns. As a domain-specific compiler for smart contracts, Solidity compiler differs from other compilers in many perspectives, posing unique challenges to detect its bugs. To understand the bugs in Solidity compiler and benefit future research, in this paper, we present the first systematic study on 533 Solidity compiler bugs. We carefully examined their characteristics (including symptoms, root causes, and distribution), and their triggering test cases. Our study leads to seven bug-revealing takeaways for Solidity compiler. Moreover, to study the limitations of Solidity compiler fuzzers and bring our findings into practical scenarios, we evaluate three Solidity compiler fuzzers on our constructed benchmark. The results show that these fuzzers are inefficient in detecting Solidity compiler bugs. The inefficiency arises from their failure to consider the interesting bug-inducing features, bug-related compilation flags, and test oracles.","tags":["Smart Contracts","Compiler testing","Empirical study","Fault Detection"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3650212.3680362","bibtex":"@inproceedings{DBLP:conf/issta/MaZS00C24,\n author = {Haoyang Ma and\n Wuqi Zhang and\n Qingchao Shen and\n Yongqiang Tian and\n Junjie Chen and\n Shing{-}Chi Cheung},\n editor = {Maria Christakis and\n Michael Pradel},\n title = {Towards Understanding the Bugs in Solidity Compiler},\n booktitle = {Proceedings of the 33rd {ACM} {SIGSOFT} International Symposium on\n Software Testing and Analysis, {ISSTA} 2024, Vienna, Austria, September\n 16-20, 2024},\n pages = {1312--1324},\n publisher = {{ACM}},\n year = {2024},\n url = {https://doi.org/10.1145/3650212.3680362},\n doi = {10.1145/3650212.3680362},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/issta/MaZS00C24.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"JavaBench: A Benchmark of Object-Oriented Code Generation for Evaluating Large Language Models.","date":"2024","authors":["Jialun Cao","Zhiyong Chen","Jiarong Wu","Shing-Chi Cheung","Chang Xu"],"venue":"the 39th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Code generation benchmarks such as HumanEval are widely adopted to evaluate LLMs' capabilities. However, after consolidating the latest 24 benchmarks, we noticed three significant imbalances. First, imbalanced programming language. 95.8% of benchmarks involve Python, while only 5 benchmarks involve Java, resulting in an insufficient understanding of LLMs' capability to generate Java code. Second, imbalanced code granularity. Function-/statement-level benchmarks account for over 83.3% of benchmarks. Only a mere handful extends to class-/project-levels, and all are limited to Python. Third, lacking advanced features. Existing benchmarks primarily assess basic coding skills (e.g., variables, operators, and control structures), while overlooking advanced Object-Oriented Programming (OOP) features (i.e., encapsulation, inheritance, and polymorphism). Considering the prevalence of these advanced features in real-world Java project development, constructing benchmarks to test LLMs on handling OOP features is necessary.","tags":["LLM","Benchmark","Java","Object-Oriented Programming"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3691620.3695470","bibtex":"@inproceedings{DBLP:conf/kbse/CaoCWC024,\n author = {Jialun Cao and\n Zhiyong Chen and\n Jiarong Wu and\n Shing{-}Chi Cheung and\n Chang Xu},\n editor = {Vladimir Filkov and\n Baishakhi Ray and\n Minghui Zhou},\n title = {JavaBench: {A} Benchmark of Object-Oriented Code Generation for Evaluating\n Large Language Models},\n booktitle = {Proceedings of the 39th {IEEE/ACM} International Conference on Automated\n Software Engineering, {ASE} 2024, Sacramento, CA, USA, October 27\n - November 1, 2024},\n pages = {870--882},\n publisher = {{ACM}},\n year = {2024},\n url = {https://doi.org/10.1145/3691620.3695470},\n doi = {10.1145/3691620.3695470},\n timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/CaoCWC024.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Neural-FEBI: Accurate function identification in Ethereum Virtual Machine bytecode.","date":"2023","authors":["Jiahao He","Shuangyin Li","Xinming Wang","Shing-Chi Cheung","Gansen Zhao","Jinji Yang"],"venue":"Journal of Systems and Software","venueShort":"JSS","abstract":"Millions of smart contracts have been deployed onto the Ethereum platform, posing potential attack subjects. Therefore, analyzing contract binaries is vital since their sources are unavailable, involving identification comprising function entry identification and detecting its boundaries. Such boundaries are critical to many smart contract applications, e.g. reverse engineering and profiling. Unfortunately, it is challenging to identify functions from these stripped contract binaries due to the lack of internal function call statements and the compiler-inducing instruction reshuffling. Recently, several existing works excessively relied on a set of handcrafted heuristic rules which impose several faults. To address this issue, we propose a novel neural network-based framework for EVM bytecode Function Entries and Boundaries Identification (neural-FEBI) that does not rely on a fixed set of handcrafted rules. Instead, it used a two-level bi-Long Short-Term Memory network and a Conditional Random Field network to locate the function entries. The suggested framework also devises a control flow traversal algorithm to determine the code segments reachable from the function entry as its boundary. Several experiments on 38,996 publicly available smart contracts collected as binary demonstrate that neural-FEBI confirms the lowest and highest F1-scores for the function entries identification task across different datasets of 88.3 to 99.7, respectively. Its performance on the function boundary identification task is also increased from 79.4% to 97.1% compared with state-of-the-art. We further demonstrate that the identified function information can be used to construct more accurate intra-procedural CFGs and call graphs. The experimental results confirm that the proposed framework significantly outperforms state-of-the-art, often based on handcrafted heuristic rules.","tags":["Smart Contracts","Program Analysis","Neural Networks","Blockchain"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1016/j.jss.2023.111627","bibtex":"@article{DBLP:journals/jss/HeLWCZY23,\n author = {Jiahao He and\n Shuangyin Li and\n Xinming Wang and\n Shing{-}Chi Cheung and\n Gansen Zhao and\n Jinji Yang},\n title = {Neural-FEBI: Accurate function identification in Ethereum Virtual\n Machine bytecode},\n journal = {J. Syst. Softw.},\n volume = {199},\n pages = {111627},\n year = {2023},\n url = {https://doi.org/10.1016/j.jss.2023.111627},\n doi = {10.1016/J.JSS.2023.111627},\n timestamp = {Sat, 13 May 2023 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/jss/HeLWCZY23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"COMET: Coverage-guided Model Generation For Deep Learning Library Testing.","date":"2023","authors":["Meiziniu Li","Jialun Cao","Yongqiang Tian","Tsz On Li","Ming Wen","Shing-Chi Cheung"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","abstract":"Recent deep learning (DL) applications are mostly built on top of DL libraries. The quality assurance of these libraries is critical to the dependable deployment of DL applications. Techniques have been proposed to generate various DL models and apply them to test these libraries. However, their test effectiveness is constrained by the diversity of layer API calls in their generated DL models. Our study reveals that these techniques can cover at most 34.1% layer inputs, 25.9% layer parameter values, and 15.6% layer sequences. As a result, we find that many bugs arising from specific layer API calls (i.e., specific layer inputs, parameter values, or layer sequences) can be missed by existing techniques. Because of this limitation, we propose COMET to effectively generate DL models with diverse layer API calls for DL library testing. COMET: (1) designs a set of mutation operators and a coverage-based search algorithm to diversify layer inputs, layer parameter values, and layer sequences in DL models. (2) proposes a model synthesis method to boost the test efficiency without compromising the layer API call diversity. Our evaluation result shows that COMET outperforms baselines by covering twice as many layer inputs (69.7% vs. 34.1%), layer parameter values (50.2% vs. 25.9%), and layer sequences (39.0% vs. 15.6%) as those by the state-of-the-art. Moreover, COMET covers 3.4% more library branches than those by existing techniques. Finally, COMET detects 32 new bugs in the latest version of eight popular DL libraries, including TensorFlow and MXNet, with 21 of them confirmed by DL library developers and seven of those confirmed bugs have been fixed by developers.","tags":["Deep Learning compiler testing","Testing","DL Compiler","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3583566","bibtex":"@article{DBLP:journals/tosem/LiCTLWC23,\n author = {Meiziniu Li and\n Jialun Cao and\n Yongqiang Tian and\n Tsz On Li and\n Ming Wen and\n Shing{-}Chi Cheung},\n title = {{COMET:} Coverage-guided Model Generation For Deep Learning Library\n Testing},\n journal = {{ACM} Trans. Softw. Eng. Methodol.},\n volume = {32},\n number = {5},\n pages = {127:1--127:34},\n year = {2023},\n url = {https://doi.org/10.1145/3583566},\n doi = {10.1145/3583566},\n timestamp = {Thu, 31 Aug 2023 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/tosem/LiCTLWC23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Runtime Permission Issues in Android Apps: Taxonomy, Practices, and Ways Forward.","date":"2021-01-01","authors":["Ying Wang","Yibo Wang","Sinan Wang","Yepang Liu","Chang Xu","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"IEEE Transactions on Software Engineering","venueShort":"TSE","abstract":"Android introduces a new permission model that allows apps to request permissions at runtime rather than at the installation time since 6.0 (Marshmallow, API level 23). While this runtime permission model provides users with greater flexibility in controlling an app's access to sensitive data and system features, it brings new challenges to app development. First, as users may grant or revoke permissions at any time while they are using an app, developers need to ensure that the app properly checks and requests required permissions before invoking any permission-protected APIs. Second, Android's permission mechanism keeps evolving and getting customized by device manufacturers. Developers are expected to comprehensively test their apps on different Android versions and device models to make sure permissions are properly requested in all situations. Unfortunately, these requirements are often impractical for developers. In practice, many Android apps suffer from various runtime permission issues (ARP issues). While existing studies have explored ARP issues, the understanding of such issues is still preliminary. To better characterize ARP issues, we performed an empirical study using 135 Stack Overflow posts that discuss ARP issues and 199 real ARP issues archived in popular open-source Android projects on GitHub. Via analyzing the data, we observed 11 types of ARP issues that commonly occur in Android apps. For each type of issues, we systematically studied: (1) how they can be manifested, (2) how pervasive and serious they are in real-world apps, and (3) how they can be fixed. We also analyzed the evolution trend of different types of issues from 2015 to 2020 to understand their impact on the Android ecosystem. Furthermore, we conducted a field survey and in-depth interviews among the practitioners from open-source community and industry, to gain insights from practitioners’ practices and learn their requirements of tools that can help combat ARP issues. Finally, to understand the strengths and weaknesses of the existing tools that can detect ARP issues, we built ARPBench , an open benchmark consisting of 94 real ARP issues, and evaluated the performance of three available tools. The experimental results indicate that the existing tools have very limited supports for detecting our observed issue types and report a large number of false alarms. We further analyzed the tools’ limitations and summarized the challenges of designing an effective ARP issue detection technique. We hope that our findings can shed light on future research and provide useful guidance to practitioners.","tags":["Android","Empirical study","Security","Testing"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/TSE.2022.3148258","bibtex":"@article{DBLP:journals/tse/WangWWLXCYZ23,\n author = {Ying Wang and\n Yibo Wang and\n Sinan Wang and\n Yepang Liu and\n Chang Xu and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Runtime Permission Issues in Android Apps: Taxonomy, Practices, and\n Ways Forward},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {49},\n number = {1},\n pages = {185--210},\n year = {2023},\n url = {https://doi.org/10.1109/TSE.2022.3148258},\n doi = {10.1109/TSE.2022.3148258},\n timestamp = {Thu, 21 Nov 2024 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/tse/WangWWLXCYZ23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Plumber: Boosting the Propagation of Vulnerability Fixes in the npm Ecosystem.","date":"2023","authors":["Ying Wang","Peng Sun","Lin Pei","Yue Yu","Chang Xu","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"IEEE Transactions on Software Engineering","venueShort":"TSE","abstract":"Vulnerabilities are known reported security threats that affect a large amount of packages in the npm ecosystem. To mitigate these security threats, the open-source community strongly suggests vulnerable packages to timely publish vulnerability fixes and recommends affected packages to update their dependencies. However, there are still serious lags in the propagation of vulnerability fixes in the ecosystem. In our preliminary study on the latest versions of 356,283 active npm packages, we found that 20.0% of them can still introduce vulnerabilities via direct or transitive dependencies although the involved vulnerable packages have already published fix versions for over a year. Prior study by (Chinthanet et al. 2021) lays the groundwork for research on how to mitigate propagation lags of vulnerability fixes in an ecosystem. They conducted an empirical investigation to identify lags that might occur between the vulnerable package release and its fixing release. They found that factors such as the branch upon which a fix landed and the severity of the vulnerability had a small effect on its propagation trajectory throughout the ecosystem. To ensure quick adoption and propagation of a release that contains the fix, they gave several actionable advice to developers and researchers. However, it is still an open question how to design an effective technique to accelerate the propagation of vulnerability fixes. Motivated by this problem, in this paper, we conducted an empirical study to learn the scale of packages that block the propagation of vulnerability fixes in the ecosystem and investigate their evolution characteristics. Furthermore, we distilled the remediation strategies that have better effects on mitigating the fix propagation lags. Leveraging our empirical findings, we propose an ecosystem-level technique, Plumber , for deriving feasible remediation strategies to boost the propagation of vulnerability fixes. To precisely diagnose the causes of fix propagation blocking, Plumber models the vulnerability metadata, and npm dependency metadata and continuously monitors their evolution. By analyzing a full-picture of the ecosystem-level dependency graph and the corresponding fix propagation statuses, it derives remediation schemes for pivotal packages. In the schemes, Plumber provides customized remediation suggestions with vulnerability impact analysis to arouse package developers’ awareness. We applied Plumber to generating 268 remediation reports for the identified pivotal packages, to evaluate its remediation effectiveness based on developers’ feedback. Encouragingly, 47.4% our remediation reports received positive feedback from many well-known npm projects, such as Tensorflow/tfjs , Ethers.js , and GoogleChrome/workbox . Our reports have boosted the propagation of vulnerability fixes into 16,403 root packages through 92,469 dependency paths. On average, each remediated package version is receiving 72,678 downloads per week by the time of this work.","tags":["Security","Empirical study","Dependency Management","Vulnerability"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/TSE.2023.3243262","bibtex":"@article{DBLP:journals/tse/0038SP00C0023,\n author = {Ying Wang and\n Peng Sun and\n Lin Pei and\n Yue Yu and\n Chang Xu and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Plumber: Boosting the Propagation of Vulnerability Fixes in the npm\n Ecosystem},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {49},\n number = {5},\n pages = {3155--3181},\n year = {2023},\n url = {https://doi.org/10.1109/TSE.2023.3243262},\n doi = {10.1109/TSE.2023.3243262},\n timestamp = {Mon, 28 Aug 2023 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/tse/0038SP00C0023.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"ConfFix: Repairing Configuration Compatibility Issues in Android Apps.","date":"2023","authors":["Huaxun Huang","Chi Xu","Ming Wen","Yepang Liu","Shing-Chi Cheung"],"venue":"the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","abstract":"XML configuration files are widely-used to specify the user interfaces (UI) of Android apps. Configuration compatibility (CC) issues are induced owing to the inconsistent handling of such XML configuration files across different Android framework versions. CC issues can cause software crashes and inconsistent look-and-feels, severely impacting the user experience of Android apps. However, there is no universal solution to resolve CC issues and app developers need to handle CC issues case by case. Existing tools are designed based on predefined rules or visual features that are possibly manifested by CC issues. Unfortunately, they can fail or generate overfitting patches when the CC issues are beyond their capabilities. To fill the above research gaps, we first empirically studied the app developers' common strategies in patching real-world CC issues. Based on the findings, we propose ConfFix, an automatic approach to repair CC issues in Android apps. ConfFix is driven by the knowledge of how an XML element is handled inconsistently in different versions of the Android framework and generates patches to eliminate such inconsistencies. We evaluated ConfFix on a set of 77 reproducible CC issues in 13 open-source Android apps. The results show that ConfFix outperforms baselines in successfully repairing 64 CC issues with a high precision. Encouragingly, the patches for 38 CC issues have been confirmed and merged by app developers.","tags":["Android","XML Configurations","Compatibility Issues","Program Repair"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3597926.3598074","bibtex":"@inproceedings{DBLP:conf/issta/HuangXW0C23,\n author = {Huaxun Huang and\n Chi Xu and\n Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung},\n editor = {Ren{\\'{e}} Just and\n Gordon Fraser},\n title = {ConfFix: Repairing Configuration Compatibility Issues in Android Apps},\n booktitle = {Proceedings of the 32nd {ACM} {SIGSOFT} International Symposium on\n Software Testing and Analysis, {ISSTA} 2023, Seattle, WA, USA, July\n 17-21, 2023},\n pages = {514--525},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3597926.3598074},\n doi = {10.1145/3597926.3598074},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/issta/HuangXW0C23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"ωTest: WebView-Oriented Testing for Android Applications.","date":"2023","authors":["Jiajun Hu","Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","abstract":"WebView is a UI widget that helps integrate web applications into the native context of Android apps. It provides powerful mechanisms for bi-directional interactions between the native-end (Java) and the web-end (JavaScript) of an Android app. However, these interaction mechanisms are complicated and have induced various types of bugs. To mitigate the problem, various techniques have been proposed to detect WebView-induced bugs via dynamic analysis, which heavily relies on executing tests to explore WebView behaviors. Unfortunately, these techniques either require manual effort or adopt random test generation approaches, which are not able to effectively explore diverse WebView behaviors. In this paper, we study the problem of test generation for WebViews in Android apps. Effective test generation for WebViews requires identifying the essential program properties to be covered by the generated tests. To this end, we propose WebView-specific properties to characterize WebView behaviors, and devise a cross-language dynamic analysis method to identify these properties. We develop ωTest, a test generation technique that searches for event sequences covering the identified WebView-specific properties. An evaluation on 74 real-world open-/closed-source Android apps shows that ωTest can cover diverse WebView behaviors and detect WebView-induced bugs effectively. ωTest detected 36 previously-unknown bugs. From the 22 bugs that we have reported to the app developers, 13 bugs were confirmed, 9 of which were fixed.","tags":["Android","Testing","WebView","Program Analysis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3597926.3598112","bibtex":"@inproceedings{DBLP:conf/issta/HuW0C23,\n author = {Jiajun Hu and\n Lili Wei and\n Yepang Liu and\n Shing{-}Chi Cheung},\n editor = {Ren{\\'{e}} Just and\n Gordon Fraser},\n title = {{\\(\\omega\\)}Test: WebView-Oriented Testing for Android Applications},\n booktitle = {Proceedings of the 32nd {ACM} {SIGSOFT} International Symposium on\n Software Testing and Analysis, {ISSTA} 2023, Seattle, WA, USA, July\n 17-21, 2023},\n pages = {992--1004},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3597926.3598112},\n doi = {10.1145/3597926.3598112},\n timestamp = {Fri, 16 May 2025 13:09:31 +0200},\n biburl = {https://dblp.org/rec/conf/issta/HuW0C23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Nuances are the Key: Unlocking ChatGPT to Find Failure-Inducing Tests with Differential Prompting.","date":"2023","authors":["Tsz On Li","Wenxi Zong","Yibo Wang","Haoye Tian","Ying Wang","Shing-Chi Cheung","Jeff Kramer"],"venue":"38th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Automated detection of software failures is an important but challenging software engineering task. It involves finding in a vast search space the failure-inducing test cases that contain an input triggering the software fault and an oracle asserting the incorrect execution. We are motivated to study how far this outstanding challenge can be solved by recent advances in large language models (LLMs) such as ChatGPT. However, our study reveals that ChatGPT has a relatively low success rate (28.8%) in finding correct failure-inducing test cases for buggy programs. A possible conjecture is that finding failure-inducing test cases requires analyzing the subtle differences (nuances) between the tokens of a program's correct version and those for its buggy version. When these two versions have similar sets of tokens and attentions, ChatGPT is weak in distinguishing their differences. We find that ChatGPT can successfully generate failure-inducing test cases when it is guided to focus on the nuances. Our solution is inspired by an interesting observation that ChatGPT could infer the intended functionality of buggy code if it is similar to the correct version. Driven by the inspiration, we develop a novel technique, called Differential Prompting, to effectively find failure-inducing test cases with the help of the compilable code synthesized by the inferred intention. Prompts are constructed based on the nuances between the given version and the synthesized code. We evaluate Differential Prompting on Quixbugs (a popular benchmark of buggy programs) and recent programs published at Codeforces (a popular programming contest portal, which is also an official benchmark of ChatGPT). We compare Differential Prompting with two baselines constructed using conventional ChatGPT prompting and Pynguin (the state-of-the-art unit test generation tool for Python programs). Our evaluation results show that for programs of Quixbugs, Differential Prompting can achieve a success rate of 75.0% in finding failure-inducing test cases, outperforming the best baseline by 2.6X. For programs of Codeforces, Differential Prompting's success rate is 66.7%, outperforming the best baseline by 4.0X.","tags":["LLM","Testing","Bug Detection","Program Synthesis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE56229.2023.00089","bibtex":"@inproceedings{DBLP:conf/kbse/LiZWTWCK23,\n author = {Tsz On Li and\n Wenxi Zong and\n Yibo Wang and\n Haoye Tian and\n Ying Wang and\n Shing{-}Chi Cheung and\n Jeff Kramer},\n title = {Nuances are the Key: Unlocking ChatGPT to Find Failure-Inducing Tests\n with Differential Prompting},\n booktitle = {38th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2023, Luxembourg, September 11-15, 2023},\n pages = {14--26},\n publisher = {{IEEE}},\n year = {2023},\n url = {https://doi.org/10.1109/ASE56229.2023.00089},\n doi = {10.1109/ASE56229.2023.00089},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/LiZWTWCK23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Testing Coreference Resolution Systems without Labeled Test Sets.","date":"2023","authors":["Jialun Cao","Yaojie Lu","Ming Wen","Shing-Chi Cheung"],"venue":"the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","venueShort":"ESEC/FSE","abstract":"Coreference resolution (CR) is a task to resolve different expressions (e.g., named entities, pronouns) that refer to the same real-world en- tity/event. It is a core natural language processing (NLP) component that underlies and empowers major downstream NLP applications such as machine translation, chatbots, and question-answering. De- spite its broad impact, the problem of testing CR systems has rarely been studied. A major difficulty is the shortage of a labeled dataset for testing. While it is possible to feed arbitrary sentences as test inputs to a CR system, a test oracle that captures their expected test outputs (coreference relations) is hard to define automatically. To address the challenge, we propose Crest, an automated testing methodology for CR systems. Crest uses constituency and depen- dency relations to construct pairs of test inputs subject to the same coreference. These relations can be leveraged to define the meta- morphic relation for metamorphic testing. We compare Crest with five state-of-the-art test generation baselines on two popular CR systems, and apply them to generate tests from 1,000 sentences randomly sampled from CoNLL-2012, a popular dataset for corefer- ence resolution. Experimental results show that Crest outperforms baselines significantly. The issues reported by Crest are all true positives (i.e., 100% precision), compared with 63% to 75% achieved by the baselines.","tags":["Metamorphic Testing","Coreference Resolution","NLP","Test Generation"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3611643.3616258","bibtex":"@inproceedings{DBLP:conf/sigsoft/CaoL0C23,\n author = {Jialun Cao and\n Yaojie Lu and\n Ming Wen and\n Shing{-}Chi Cheung},\n editor = {Satish Chandra and\n Kelly Blincoe and\n Paolo Tonella},\n title = {Testing Coreference Resolution Systems without Labeled Test Sets},\n booktitle = {Proceedings of the 31st {ACM} Joint European Software Engineering\n Conference and Symposium on the Foundations of Software Engineering,\n {ESEC/FSE} 2023, San Francisco, CA, USA, December 3-9, 2023},\n pages = {107--119},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3611643.3616258},\n doi = {10.1145/3611643.3616258},\n timestamp = {Fri, 31 May 2024 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/sigsoft/CaoL0C23.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Can Machine Learning Pipelines Be Better Configured?","date":"2023","authors":["Yibo Wang","Ying Wang","Tingwei Zhang","Yue Yu","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","venueShort":"ESEC/FSE","abstract":"A Machine Learning (ML) pipeline configures the workflow of a learning task using the APIs provided by ML libraries. However, a pipeline’s performance can vary significantly across different configurations of ML library versions. Misconfigured pipelines can result in inferior performance, such as poor execution time and memory usage, numeric errors and even crashes. A pipeline is subject to misconfiguration if it exhibits significantly inconsistent performance upon changes in the versions of its configured libraries or the combination of these libraries. We refer to such performance inconsistency as a pipeline configuration (PLC) issue.","tags":["ML pipeline","Compatibility Issues","Performance inconsistency","Configuration testing"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3611643.3616352","bibtex":"@inproceedings{DBLP:conf/sigsoft/WangWZ0C0023,\n author = {Yibo Wang and\n Ying Wang and\n Tingwei Zhang and\n Yue Yu and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n editor = {Satish Chandra and\n Kelly Blincoe and\n Paolo Tonella},\n title = {Can Machine Learning Pipelines Be Better Configured?},\n booktitle = {Proceedings of the 31st {ACM} Joint European Software Engineering\n Conference and Symposium on the Foundations of Software Engineering,\n {ESEC/FSE} 2023, San Francisco, CA, USA, December 3-9, 2023},\n pages = {463--475},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3611643.3616352},\n doi = {10.1145/3611643.3616352},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/sigsoft/WangWZ0C0023.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Understanding the Bug Characteristics and Fix Strategies of Federated Learning Systems.","date":"2023","authors":["Xiaohu Du","Xiao Chen","Jialun Cao","Ming Wen","Shing-Chi Cheung","Hai Jin"],"venue":"the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","venueShort":"ESEC/FSE","abstract":"Federated learning (FL) is an emerging machine learning paradigm that aims to address the problem of isolated data islands. To preserve privacy, FL allows machine learning models and deep neural networks to be trained from decentralized data kept privately at individual devices. FL has been increasingly adopted in missioncritical fields such as finance and healthcare. However, bugs in FL systems are inevitable and may result in catastrophic consequences such as financial loss, inappropriate medical decision, and violation of data privacy ordinance. While many recent studies were conducted to understand the bugs in machine learning systems, there is no existing study to characterize the bugs arising from the unique nature of FL systems. To fill the gap, we collected 395 real bugs from six popular FL frameworks (Tensorflow Federated, PySyft, FATE, Flower, PaddleFL, and Fedlearner) in GitHub and StackOverflow, and then manually analyzed their symptoms and impacts, prone stages, root causes, and fix strategies. Furthermore, we report a series of findings and actionable implications that can potentially facilitate the detection of FL bugs.","tags":["Federated Learning","Bug Detection","Empirical study","Deep Learning"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3611643.3616347","bibtex":"@inproceedings{DBLP:conf/sigsoft/DuCC0C023,\n author = {Xiaohu Du and\n Xiao Chen and\n Jialun Cao and\n Ming Wen and\n Shing{-}Chi Cheung and\n Hai Jin},\n editor = {Satish Chandra and\n Kelly Blincoe and\n Paolo Tonella},\n title = {Understanding the Bug Characteristics and Fix Strategies of Federated\n Learning Systems},\n booktitle = {Proceedings of the 31st {ACM} Joint European Software Engineering\n Conference and Symposium on the Foundations of Software Engineering,\n {ESEC/FSE} 2023, San Francisco, CA, USA, December 3-9, 2023},\n pages = {1358--1370},\n publisher = {{ACM}},\n year = {2023},\n url = {https://doi.org/10.1145/3611643.3616347},\n doi = {10.1145/3611643.3616347},\n timestamp = {Mon, 23 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/sigsoft/DuCC0C023.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Multi-Constraint Adversarial Networks for Unsupervised Image-to-Image Translation.","date":"2022","authors":["Divya Saxena","Tarun Kulshrestha","Jiannong Cao","Shing-Chi Cheung"],"venue":"IEEE Transactions on Image Processing","venueShort":"TIP","abstract":"Unsupervised image-to-image translation aims to learn the mapping from an input image in a source domain to an output image in a target domain without paired training dataset. Recently, remarkable progress has been made in translation due to the development of generative adversarial networks (GANs). However, existing methods suffer from the training instability as gradients passing from discriminator to generator become less informative when the source and target domains exhibit sufficiently large discrepancies in appearance or shape. To handle this challenging problem, in this paper, we propose a novel multi-constraint adversarial model (MCGAN) for image translation in which multiple adversarial constraints are applied at generator's multi-scale outputs by a single discriminator to pass gradients to all the scales simultaneously and assist generator training for capturing large discrepancies in appearance between two domains. We further notice that the solution to regularize generator is helpful in stabilizing adversarial training, but results may have unreasonable structure or blurriness due to less context information flow from discriminator to generator. Therefore, we adopt dense combinations of the dilated convolutions at discriminator for supporting more information flow to generator. With extensive experiments on three public datasets, cat-to-dog, horse-to-zebra, and apple-to-orange, our method significantly improves state-of-the-arts on all datasets.","tags":["Deep Learning","Neural Networks","Generative Adversarial Networks","Image-to-Image Translation"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/TIP.2022.3144886","bibtex":"@article{DBLP:journals/tip/SaxenaKCC22,\n author = {Divya Saxena and\n Tarun Kulshrestha and\n Jiannong Cao and\n Shing{-}Chi Cheung},\n title = {Multi-Constraint Adversarial Networks for Unsupervised Image-to-Image\n Translation},\n journal = {{IEEE} Trans. Image Process.},\n volume = {31},\n pages = {1601--1612},\n year = {2022},\n url = {https://doi.org/10.1109/TIP.2022.3144886},\n doi = {10.1109/TIP.2022.3144886},\n timestamp = {Wed, 23 Feb 2022 00:00:00 +0100},\n biburl = {https://dblp.org/rec/journals/tip/SaxenaKCC22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"SemMT: A Semantic-Based Testing Approach for Machine Translation Systems.","date":"2020-01-01","authors":["Jialun Cao","Meiziniu Li","Yeting Li","Ming Wen","Shing-Chi Cheung","Haiming Chen"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","abstract":"Machine translation has wide applications in daily life. In mission-critical applications such as translating official documents, incorrect translation can have unpleasant or sometimes catastrophic consequences. This motivates recent research on the testing methodologies for machine translation systems. Existing methodologies mostly rely on metamorphic relations designed at the textual level (e.g., Levenshtein distance) or syntactic level (e.g., distance between grammar structures) to determine the correctness of translation results. However, these metamorphic relations do not consider whether the original and the translated sentences have the same meaning (i.e., semantic similarity). To address this problem, in this article we propose SemMT, an automatic testing approach for machine translation systems based on semantic similarity checking. SemMT applies round-trip translation and measures the semantic similarity between the original and the translated sentences. Our insight is that the semantics concerning logical relations and quantifiers in sentences can be captured by regular expressions (or deterministic finite automata) where efficient semantic equivalence/similarity checking algorithms can be applied. Leveraging the insight, we propose three semantic similarity metrics and implement them in SemMT. We compared SemMT with related state-of-the-art testing techniques, demonstrating the effectiveness of mistranslation detection. The experiment results show that SemMT outperforms existing metrics, achieving an increase of 34.2% and 15.4% on accuracy and F-score, respectively. We also study the possibility of further enhancing the performance by combining various metrics. Finally, we discuss a solution to locate the suspicious trip in round-trip translation, which provides hints for bug diagnosis.","tags":["Metamorphic Testing","Machine Translation","Semantic Similarity","Regular expression"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3490488","bibtex":"@article{DBLP:journals/tosem/CaoLLWCC22,\n author = {Jialun Cao and\n Meiziniu Li and\n Yeting Li and\n Ming Wen and\n Shing{-}Chi Cheung and\n Haiming Chen},\n title = {SemMT: {A} Semantic-Based Testing Approach for Machine Translation\n Systems},\n journal = {{ACM} Trans. Softw. Eng. Methodol.},\n volume = {31},\n number = {2},\n pages = {34e:1--34e:36},\n year = {2022},\n url = {https://doi.org/10.1145/3490488},\n doi = {10.1145/3490488},\n timestamp = {Thu, 22 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/journals/tosem/CaoLLWCC22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"APER: Evolution-Aware Runtime Permission Misuse Detection for Android Apps.","date":"2022","authors":["Sinan Wang","Yibo Wang","Xian Zhan","Ying Wang","Yepang Liu","Xiapu Luo","Shing-Chi Cheung"],"venue":"44th IEEE/ACM 44th International Conference on Software Engineering","venueShort":"ICSE","abstract":"The Android platform introduces the runtime permission model in version 6.0. The new model greatly improves data privacy and user experience, but brings new challenges for app developers. First, it allows users to freely revoke granted permissions. Hence, developers cannot assume that the permissions granted to an app would keep being granted. Instead, they should make their apps carefully check the permission status before invoking dangerous APIs. Second, the permission specification keeps evolving, bringing new types of compatibility issues into the ecosystem. To understand the impact of the challenges, we conducted an empirical study on 13,352 popular Google Play apps. We found that 86.0% apps used dangerous APIs asynchronously after permission management and 61.2% apps used evolving dangerous APIs. If an app does not properly handle permission revocations or platform differences, unexpected runtime issues may happen and even cause app crashes. We call such Android Runtime Permission issues as ARP bugs. Unfortunately, existing runtime permission issue detection tools cannot effectively deal with the ARP bugs induced by asynchronous permission management and permission specification evolution. To fill the gap, we designed a static analyzer, Aper, that performs reaching definition and dominator analysis on Android apps to detect the two types of ARP bugs. To compare Aper with existing tools, we built a benchmark, ARPfix, from 60 real ARP bugs. Our experiment results show that Aper significantly outperforms two academic tools, ARPDroid and RevDroid, and an industrial tool, Lint, on ARPfix, with an average improvement of 46.3% on F1-score. In addition, Aper successfully found 34 ARP bugs in 214 open-source Android apps, most of which can result in abnormal app behaviors (such as app crashes) according to our manual validation. We reported these bugs to the app developers. So far, 17 bugs have been confirmed and seven have been fixed.","tags":["Android","Empirical study","Security","Program Analysis"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3510003.3510074","bibtex":"@inproceedings{DBLP:conf/icse/WangWZWLLC22,\n author = {Sinan Wang and\n Yibo Wang and\n Xian Zhan and\n Ying Wang and\n Yepang Liu and\n Xiapu Luo and\n Shing{-}Chi Cheung},\n title = {{APER:} Evolution-Aware Runtime Permission Misuse Detection for Android\n Apps},\n booktitle = {44th {IEEE/ACM} 44th International Conference on Software Engineering,\n {ICSE} 2022, Pittsburgh, PA, USA, May 25-27, 2022},\n pages = {125--137},\n publisher = {{ACM}},\n year = {2022},\n url = {https://doi.org/10.1145/3510003.3510074},\n doi = {10.1145/3510003.3510074},\n timestamp = {Sun, 19 Jan 2025 13:14:40 +0100},\n biburl = {https://dblp.org/rec/conf/icse/WangWZWLLC22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"DeepFD: Automated Fault Diagnosis and Localization for Deep Learning Programs.","date":"2022","authors":["Jialun Cao","Meiziniu Li","Xiao Chen","Ming Wen","Yongqiang Tian","Bo Wu","Shing-Chi Cheung"],"venue":"44th IEEE/ACM 44th International Conference on Software Engineering","venueShort":"ICSE","abstract":"As Deep Learning (DL) systems are widely deployed for mission-critical applications, debugging such systems becomes essential. Most existing works identify and repair suspicious neurons on the trained Deep Neural Network (DNN), which, unfortunately, might be a detour. Specifically, several existing studies have reported that many unsatisfactory behaviors are actually originated from the faults residing in DL programs. Besides, locating faulty neurons is not actionable for developers, while locating the faulty statements in DL programs can provide developers with more useful information for debugging. Though a few recent studies were proposed to pinpoint the faulty statements in DL programs or the training settings (e.g. too large learning rate), they were mainly designed based on predefined rules, leading to many false alarms or false negatives, especially when the faults are beyond their capabilities.","tags":["Deep Learning","Fault Detection","Program Analysis","Neural Networks"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3510003.3510099","bibtex":"@inproceedings{DBLP:conf/icse/CaoLC0TWC22,\n author = {Jialun Cao and\n Meiziniu Li and\n Xiao Chen and\n Ming Wen and\n Yongqiang Tian and\n Bo Wu and\n Shing{-}Chi Cheung},\n title = {DeepFD: Automated Fault Diagnosis and Localization for Deep Learning\n Programs},\n booktitle = {44th {IEEE/ACM} 44th International Conference on Software Engineering,\n {ICSE} 2022, Pittsburgh, PA, USA, May 25-27, 2022},\n pages = {573--585},\n publisher = {{ACM}},\n year = {2022},\n url = {https://doi.org/10.1145/3510003.3510099},\n doi = {10.1145/3510003.3510099},\n timestamp = {Tue, 24 Mar 2026 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/icse/CaoLC0TWC22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Nufix: Escape From NuGet Dependency Maze.","date":"2022","authors":["Zhenming Li","Ying Wang","Zeqi Lin","Shing-Chi Cheung","Jian-Guang Lou"],"venue":"44th IEEE/ACM 44th International Conference on Software Engineering","venueShort":"ICSE","abstract":"Developers usually suffer from dependency maze (DM) issues, i.e., package dependency constraints are violated when a project's platform or dependencies are changed. This problem is especially serious in .NET ecosystem due to its fragmented platforms (e.g., .NET Framework, .NET Core, and .NET Standard). Fixing DM issues is challenging due to the complexity of dependency constraints: multiple DM issues often occur in one project; solving one DM issue usually causes another DM issue cropping up; the exponential search space of possible dependency combinations is also a barrier.","tags":["Dependency Management","Compatibility Issues","Third-Party Libraries","Empirical study"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3510003.3510118","bibtex":"@inproceedings{DBLP:conf/icse/LiWLCL22,\n author = {Zhenming Li and\n Ying Wang and\n Zeqi Lin and\n Shing{-}Chi Cheung and\n Jian{-}Guang Lou},\n title = {Nufix: Escape From NuGet Dependency Maze},\n booktitle = {44th {IEEE/ACM} 44th International Conference on Software Engineering,\n {ICSE} 2022, Pittsburgh, PA, USA, May 25-27, 2022},\n pages = {1545--1557},\n publisher = {{ACM}},\n year = {2022},\n url = {https://doi.org/10.1145/3510003.3510118},\n doi = {10.1145/3510003.3510118},\n timestamp = {Tue, 02 Aug 2022 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/icse/LiWLCL22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Insight: Exploring Cross-Ecosystem Vulnerability Impacts.","date":"2022","authors":["Meiqiu Xu","Ying Wang","Shing-Chi Cheung","Hai Yu","Zhiliang Zhu"],"venue":"37th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Vulnerabilities, referred to as CLV issues, are induced by cross-language invocations of vulnerable libraries. Such issues greatly increase the attack surface of Python/Java projects due to their pervasive use of C libraries. Existing Python/Java build tools in PyPI and Maven ecosystems fail to report the dependency on vulnerable libraries written in other languages such as C. CLV issues are easily missed by developers. In this paper, we conduct the first empirical study on the status quo of CLV issues in PyPI and Maven ecosystems. It is found that 82,951 projects in these ecosystems are directly or indirectly dependent on libraries compiled from the C project versions that are identified to be vulnerable in CVE reports. Our study arouses the awareness of CLV issues in popular ecosystems and presents related analysis results.","tags":["Vulnerability","Third-Party Libraries","Empirical study","Dependency Management"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1145/3551349.3556921","bibtex":"@inproceedings{DBLP:conf/kbse/XuWCY022,\n author = {Meiqiu Xu and\n Ying Wang and\n Shing{-}Chi Cheung and\n Hai Yu and\n Zhiliang Zhu},\n title = {Insight: Exploring Cross-Ecosystem Vulnerability Impacts},\n booktitle = {37th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2022, Rochester, MI, USA, October 10-14, 2022},\n pages = {58:1--58:13},\n publisher = {{ACM}},\n year = {2022},\n url = {https://doi.org/10.1145/3551349.3556921},\n doi = {10.1145/3551349.3556921},\n timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},\n biburl = {https://dblp.org/rec/conf/kbse/XuWCY022.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"RegexScalpel: Regular Expression Denial of Service (ReDoS) Defense by Localize-and-Fix.","date":"2022","authors":["Yeting Li","Yecheng Sun","Zhiwu Xu","Jialun Cao","Yuekang Li","Rongchen Li","Haiming Chen","Shing-Chi Cheung","Yang Liu","Yang Xiao"],"venue":"31st USENIX Security Symposium","venueShort":"USENIX Security","abstract":null,"tags":[],"arxivUrl":null,"paperUrl":"https://www.usenix.org/conference/usenixsecurity22/presentation/li-yeting","bibtex":"@inproceedings{DBLP:conf/uss/LiS0CLLCC0X22,\n author = {Yeting Li and\n Yecheng Sun and\n Zhiwu Xu and\n Jialun Cao and\n Yuekang Li and\n Rongchen Li and\n Haiming Chen and\n Shing{-}Chi Cheung and\n Yang Liu and\n Yang Xiao},\n editor = {Kevin R. B. Butler and\n Kurt Thomas},\n title = {RegexScalpel: Regular Expression Denial of Service (ReDoS) Defense\n by Localize-and-Fix},\n booktitle = {31st {USENIX} Security Symposium, {USENIX} Security 2022, Boston,\n MA, USA, August 10-12, 2022},\n pages = {4183--4200},\n publisher = {{USENIX} Association},\n year = {2022},\n url = {https://www.usenix.org/conference/usenixsecurity22/presentation/li-yeting},\n timestamp = {Thu, 22 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/uss/LiS0CLLCC0X22.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Characterizing Transaction-Reverting Statements in Ethereum Smart Contracts.","date":"2021","authors":["Lu Liu","Lili Wei","Wuqi Zhang","Ming Wen","Yepang Liu","Shing-Chi Cheung"],"venue":"36th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","abstract":"Smart contracts are programs stored on blockchains to execute transactions. When input constraints or security properties are violated at runtime, the transaction being executed by a smart contract needs to be reverted to avoid undesirable consequences. On Ethereum, the most popular blockchain that supports smart contracts, developers can choose among three transaction-reverting statements (i.e., require, if…revert, and if…throw) to handle anomalous transactions. While these transaction-reverting statements are vital for preventing smart contracts from exhibiting abnormal behaviors or suffering malicious attacks, there is limited understanding of how they are used in practice. In this work, we perform the first empirical study to characterize transaction-reverting statements in Ethereum smart contracts. We measured the prevalence of these statements in 3,866 verified smart contracts from popular dapps and built a taxonomy of their purposes via manually analyzing 557 transaction-reverting statements. We also compared template contracts and their corresponding custom contracts to understand how developers customize the use of transaction-reverting statements. Finally, we analyzed the security impact of transaction-reverting statements by removing them from smart contracts and comparing the mutated contracts against the original ones. Our study led to important findings. For example, we found that transaction-reverting statements are commonly used to perform seven types of authority verifications or validity checks, and missing such statements may compromise the security of smart contracts. We also found that current smart contract security analyzers cannot effectively handle transaction-reverting statements when detecting security vulnerabilities. Our findings can shed light on further research in the broad area of smart contract quality assurance and provide practical guidance to smart contract developers on the appropriate use of transaction-reverting statements.","tags":["Smart Contracts","Empirical study","Security","Vulnerability"],"arxivUrl":null,"paperUrl":"https://doi.org/10.1109/ASE51524.2021.9678597","bibtex":"@inproceedings{DBLP:conf/kbse/LiuWZ00C21,\n author = {Lu Liu and\n Lili Wei and\n Wuqi Zhang and\n Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung},\n title = {Characterizing Transaction-Reverting Statements in Ethereum Smart\n Contracts},\n booktitle = {36th {IEEE/ACM} International Conference on Automated Software Engineering,\n {ASE} 2021, Melbourne, Australia, November 15-19, 2021},\n pages = {630--641},\n publisher = {{IEEE}},\n year = {2021},\n url = {https://doi.org/10.1109/ASE51524.2021.9678597},\n doi = {10.1109/ASE51524.2021.9678597},\n timestamp = {Fri, 16 May 2025 01:00:00 +0200},\n biburl = {https://dblp.org/rec/conf/kbse/LiuWZ00C21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","awards":[],"projectUrl":null,"slidesUrl":null},{"title":"Workflow Refactoring for Maximizing Concurrency and Block-Structuredness","date":"2021","authors":["Wei Song","Hans-Arno Jacobsen","Shing-Chi Cheung","Hongyu Liu","Xiaoxing Ma"],"venue":"IEEE Trans. Serv. Comput.","venueShort":"TSC","tags":["Workflow refactoring","activity dependence","concurrency maximization","block-structuredness","synchronization links"],"abstract":"\nIn the era of Internet and big data, contemporary workflows become increasingly large in scale and complex in structure, introducing greater challenges for workflow modeling. Workflows are not with maximized concurrency and block-structuredness in terms of control flow, though languages supporting block-structuredness (e.g., BPEL) are employed. Existing workflow refactoring approaches mostly focus on maximizing concurrency according to dependences between activities, but do not consider the block-structuredness of the refactored workflow. It is easier to comprehend and analyze a workflow that is block-structured and to transform it into BPEL-like processes. In this paper, we aim at maximizing both concurrency and block-structuredness. Nevertheless, not all workflows can be refactored with a block-structured representation, and it is intractable to make sure that the refactored workflows are as block-structured as possible. We first define a well-formed dependence pattern of activities. The control flow among the activities in this pattern can be represented in block-structured forms with maximized concurrency. Then, we propose a greedy heuristics-based graph reduction approach to recursively find such patterns. In this way, the resulting workflow is with maximized concurrency and its block-structuredness approximates optimality. We show the effectiveness and efficiency of our approach with real-world scientific workflows.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@article{DBLP:journals/tsc/SongJCLM21,\n author = {Wei Song and\n Hans{-}Arno Jacobsen and\n Shing{-}Chi Cheung and\n Hongyu Liu and\n Xiaoxing Ma},\n title = {Workflow Refactoring for Maximizing Concurrency and Block-Structuredness},\n journal = {{IEEE} Trans. Serv. Comput.},\n volume = {14},\n number = {4},\n pages = {1224--1237},\n year = {2021},\n url = {https://doi.org/10.1109/TSC.2018.2867593},\n doi = {10.1109/TSC.2018.2867593},\n timestamp = {Thu, 12 Aug 2021 17:51:00 +0200},\n biburl = {https://dblp.org/rec/journals/tsc/SongJCLM21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"Sifter: A Service Isolation Strategy for Internet Applications","date":"2021","authors":["Chunyang Ye","Shing-Chi Cheung","Wing Kwong Chan"],"venue":"IEEE Trans. Serv. Comput.","venueShort":"TSC","tags":["Atomicity sphere","behavior constraint","exception handling","implicit interaction","isolation","web service"],"abstract":"\nService oriented architecture (SOA) provides a flexible platform to build collaborative Internet applications by composing existing self-contained and autonomous services. However, the implicit interactions among the concurrently provisioned services may introduce interference to Internet applications and cause them behave abnormally. It is thus desirable to isolate services to safeguard their application consistency. Existing approaches mostly address this problem by restricting concurrent execution of services to avoid all the implicit interactions. These approaches, however, compromise the performance and flexibility of Internet applications due to the long running nature of services. This paper presents Sifter, a new service isolation strategy for Internet applications. We devise in this strategy a novel static approach to analyze the potential implicit interactions among the services and their impacts on the consistency of the associated Internet applications. By locating only those afflicted implicit interactions that may violate the application consistency, a novel approach based on exception handling and behavior constraints is customized to involved services to eliminate their impacts. We show that this approach exempts the consistency property of Internet applications from being interfered at runtime. The experimental results show that our approach has a better performance than existing solutions.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@article{DBLP:journals/tsc/YeCC21,\n author = {Chunyang Ye and\n Shing{-}Chi Cheung and\n Wing Kwong Chan},\n title = {Sifter: {A} Service Isolation Strategy for Internet Applications},\n journal = {{IEEE} Trans. Serv. Comput.},\n volume = {14},\n number = {5},\n pages = {1545--1557},\n year = {2021},\n url = {https://doi.org/10.1109/TSC.2018.2876254},\n doi = {10.1109/TSC.2018.2876254},\n timestamp = {Wed, 03 Nov 2021 08:27:31 +0100},\n biburl = {https://dblp.org/rec/journals/tsc/YeCC21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"ContractGuard: Defend Ethereum Smart Contracts with Embedded Intrusion Detection","date":"2020","authors":["Xinming Wang","Jiahao He","Zhijian Xie","Gansen Zhao","Shing-Chi Cheung"],"venue":"IEEE Trans. Serv. Comput.","venueShort":"TSC","tags":["Blockchain","Smart Contracts","Program Analysis","Security"],"abstract":"\nEthereum smart contracts are programs that can be collectively executed by a network of mutually untrusted nodes. Smart contracts handle and transfer assets of values, offering strong incentives for malicious attacks. Intrusion attacks are a popular type of malicious attacks. In this article, we propose ContractGuard, the first intrusion detection system (IDS) to defend Ethereum smart contracts against such attacks. Like IDSs for conventional programs, ContractGuard detects intrusion attempts as abnormal control flow. However, existing IDS techniques/tools are inapplicable to Ethereum smart contracts due to Ethereum's decentralized nature and its highly restrictive execution environment. To address these issues, we design ContractGuard by embedding it in the contracts to profile context-tagged acyclic paths, and optimizing it under the Ethereum gas-oriented performance model. The main goal is to minimize the overheads, to which the users will be extremely sensitive since the cost needs to be paid upfront in digital concurrency. Empirical investigation using real-life contracts deployed in the Ethereum mainnet shows that on average, ContractGuard only adds to 36.14 percent of the deployment overhead and 28.27 percent of the runtime overhead. Furthermore, we conducted controlled experiments and show that ContractGuard successfully guard against attacks on all real-world vulnerabilities and 83 percent of the seeded vulnerabilities.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@article{DBLP:journals/tsc/WangHXZC20,\n author = {Xinming Wang and\n Jiahao He and\n Zhijian Xie and\n Gansen Zhao and\n Shing{-}Chi Cheung},\n title = {ContractGuard: Defend Ethereum Smart Contracts with Embedded Intrusion\n Detection},\n journal = {{IEEE} Trans. Serv. Comput.},\n volume = {13},\n number = {2},\n pages = {314--328},\n year = {2020},\n url = {https://doi.org/10.1109/TSC.2019.2949561},\n doi = {10.1109/TSC.2019.2949561},\n timestamp = {Fri, 22 May 2020 21:56:08 +0200},\n biburl = {https://dblp.org/rec/journals/tsc/WangHXZC20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"Detecting numerical bugs in neural network architectures","date":"2020","authors":["Yuhao Zhang","Luyao Ren","Liqian Chen","Yingfei Xiong","Shing-Chi Cheung","Tao Xie"],"venue":"Proceedings of the 28th ACM Joint European SoftwareEngineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE ’20)","venueShort":"ESEC/FSE","tags":["Neural Networks","Formal Software Verification","Program Analysis"],"awards":["Distinguished Paper"],"abstract":"\nDetecting bugs in deep learning software at the architecture level provides additional benefits that detecting bugs at the model level does not provide. This paper makes the first attempt to conduct static analysis for detecting numerical bugs at the architecture level. We propose a static analysis approach for detecting numerical bugs in neural architectures based on abstract interpretation. Our approach mainly comprises two kinds of abstraction techniques, i.e., one for tensors and one for numerical values. Moreover, to scale up while maintaining adequate detection precision, we propose two abstraction techniques: tensor partitioning and (elementwise) affine relation analysis to abstract tensors and numerical values, respectively. We realize the combination scheme of tensor partitioning and affine relation analysis (together with interval analysis) as DEBAR, and evaluate it on two datasets: neural architectures with known bugs (collected from existing studies) and real-world neural architectures. The evaluation results show that DEBAR outperforms other tensor and numerical abstraction techniques on accuracy without losing scalability. DEBAR successfully detects all known numerical bugs with no false positives within 1.7–2.3 seconds per architecture. On the real-world architectures, DEBAR reports 529 warnings within 2.6–135.4 seconds per architecture, where 299 warnings are true positives.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@inproceedings{DBLP:conf/sigsoft/ZhangRC0C020,\n author = {Yuhao Zhang and\n Luyao Ren and\n Liqian Chen and\n Yingfei Xiong and\n Shing{-}Chi Cheung and\n Tao Xie},\n editor = {Prem Devanbu and\n Myra B. Cohen and\n Thomas Zimmermann},\n title = {Detecting numerical bugs in neural network architectures},\n booktitle = {{ESEC/FSE} '20: 28th {ACM} Joint European Software Engineering Conference\n and Symposium on the Foundations of Software Engineering, Virtual\n Event, USA, November 8-13, 2020},\n pages = {826--837},\n publisher = {{ACM}},\n year = {2020},\n url = {https://doi.org/10.1145/3368089.3409720},\n doi = {10.1145/3368089.3409720},\n timestamp = {Tue, 10 Nov 2020 10:58:23 +0100},\n biburl = {https://dblp.org/rec/conf/sigsoft/ZhangRC0C020.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null},{"title":"Boosting automated program repair with bug-inducing commits","date":"2020","authors":["Ming Wen","Yepang Liu","Shing-Chi Cheung"],"venue":"42nd International Conference on Software Engineering (NIER Track)","venueShort":"ICSE NIER","tags":["Program Repair"],"abstract":"\nThe search space explosion problem is a long-standing challenge for search-based automated program repair (APR). The operation space, which defines how to select appropriate mutation operators, and the ingredient space, which defines how to select appropriate code elements as fixing ingredients, are two major factors that determine the search space. Conventional approaches mainly devise fixing strategies via learning from frequent fixing patterns based on substantial patches collected from open-source projects. In this paper, we propose a new direction for search-based APR, that is to repair a bug via learning from how the bug was introduced instead of learning from how other bugs are frequently fixed. Our empirical study reveals that substantial mutation operators and fixing ingredients required to fix a bug can be inferred from the commit that introduced the bug. Based on the findings of our empirical study, we devised a preliminary fixing strategy based on bug-inducing commits, which is able to repair 8 new bugs that cannot be repaired by the state-of-the-art techniques. Such results demonstrate that our proposed new idea for searched-based APR is promising.\n ","projectUrl":null,"paperUrl":null,"slidesUrl":null,"bibtex":"@inproceedings{DBLP:conf/icse/Wen0C20,\n author = {Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung},\n editor = {Gregg Rothermel and\n Doo{-}Hwan Bae},\n title = {Boosting automated program repair with bug-inducing commits},\n booktitle = {{ICSE-NIER} 2020: 42nd International Conference on Software Engineering,\n New Ideas and Emerging Results, Seoul, South Korea, 27 June - 19 July,\n 2020},\n pages = {77--80},\n publisher = {{ACM}},\n year = {2020},\n url = {https://doi.org/10.1145/3377816.3381743},\n doi = {10.1145/3377816.3381743},\n timestamp = {Mon, 03 May 2021 16:42:27 +0200},\n biburl = {https://dblp.org/rec/conf/icse/Wen0C20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"MR-Scout: Automated Synthesis of Metamorphic Relations from Existing Test Cases","date":"2024-06-29","authors":["Congying Xu","Valerio Terragni","Hengcheng Zhu","Jiarong Wu","Shing-Chi Cheung"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["Meramorphic Testing"],"projectUrl":"https://mr-scout.github.io/","paperUrl":"https://dl.acm.org/doi/abs/10.1145/3656340","arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"MR-Adopt: Automatic Deduction of Input Transformation Function for Metamorphic Testing","date":"2024-09-01","authors":["Congying Xu","Songqiang Chen","Jiarong Wu","Shing-Chi Cheung","Valerio Terragni","Hengcheng Zhu","Jialun Cao"],"venue":"IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","tags":["LLM","Meramorphic Testing"],"projectUrl":"https://mr-adopt.github.io/","paperUrl":"https://arxiv.org/abs/2408.15815","arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Automatic build repair for test cases using incompatible Java versions","date":"2024","authors":["Ching Hang Mak","Shing-Chi Cheung"],"venue":"Information and Software Technology","venueShort":"INFSOF","tags":["Java","Program Analysis","Program Repair","Third-Party Libraries"],"abstract":"\n Context:\n Bug bisection is a common technique used to identify a revision that introduces a bug or indirectly fixes a bug, and often involves executing multiple revisions of a project to determine whether the bug is present within the revision. However, many legacy revisions often cannot be successfully compiled due to changes in the programming language or tools used in the compilation process, adding complexity and preventing automation in the bisection process.\n \n Objective:\n In this paper, we introduce an approach to repair test cases of Java projects by performing dependency minimization. Our approach aims to remove classes and methods that are not required for the execution of one or more test cases. Unlike existing state-of-the-art techniques, our approach performs minimization at source-level, which allows compile-time errors to be fixed.\n \n Method:\n A standalone Java tool implementing our technique was developed, and we evaluated our technique using subjects from Defects4J retargeted against Java 8 and 17.\n \n Results:\n Our evaluation showed that a majority of subjects can be repaired solely by performing minimization, including replicating the test results of the original version. Furthermore, our technique is also shown to achieve accurate minimized results, while only adding a small overhead to the bisection process.\n \n Conclusion:\n Our proposed technique is shown to be effective for repairing build failures with minimal overhead, making it suitable for use in automated bug bisection. Our tool can also be adapted for use cases such as bug corpus creation and refactoring. \n ","projectUrl":"https://github.com/Derppening/test-dependency-minimization/","arxivUrl":"https://arxiv.org/abs/2404.17818","bibtex":"@article{mak2024automatic,\n title={Automatic build repair for test cases using incompatible java versions},\n author={Mak, Ching Hang and Cheung, Shing-Chi},\n journal={Information and Software Technology},\n pages={107473},\n year={2024},\n publisher={Elsevier}\n }","paperUrl":null,"slidesUrl":null,"awards":[]},{"title":"CINA: Suppressing the Detection of Unstable Context Inconsistency","date":"2015","authors":["Chang Xu","Wang Xi","Shing-Chi Cheung","Xiaoxing Ma","Chun Cao","Jian Lu"],"venue":"IEEE Transactions of Software Engineering 41(9), September 2015","venueShort":"TSE","tags":[],"abstract":"\n Context-aware applications adapt their behavior based on contexts. Contexts can, however, be incorrect. A popular means to build dependable applications is to augment them with a set of constraints to govern the consistency of context values. These constraints are evaluated upon context changes to detect inconsistencies so that they can be timely handled. However, we observe that many context inconsistencies are unstable. They vanish by themselves and do not require handling. Such inconsistencies are detected due to misaligned sensor sampling or improper inconsistency detection scheduling. We call them unstable context inconsistencies (or STINs). STINs should be avoided to prevent unnecessary inconsistency handling and unstable behavioral adaptation to applications. In this article, we study STINs systematically, from examples to theoretical analysis, and present algorithms to suppress their detection. Our key insight is that only certain patterns of context changes can make a consistency constraint subject to the detection of STINs. We derive such patterns and proactively use them to suppress the detection of STINs. We implemented our idea and applied it to real-world applications. Experimental results confirmed its effectiveness in suppressing the detection of numerous STINs with negligible overhead, while preserving the detection of stable context inconsistencies that require inconsistency handling.\n ","paperUrl":"https://www.computer.org/csdl/trans/ts/2015/09/07078871-abs.html","bibtex":"@article{DBLP:journals/tse/XuXCMCL15,\n author = {Chang Xu and\n Wang Xi and\n Shing{-}Chi Cheung and\n Xiaoxing Ma and\n Chun Cao and\n Jian Lu},\n title = {Cina: Suppressing the Detection of Unstable Context Inconsistency},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {41},\n number = {9},\n pages = {842--865},\n year = {2015},\n url = {http://dx.doi.org/10.1109/TSE.2015.2418760},\n doi = {10.1109/TSE.2015.2418760},\n timestamp = {Thu, 10 Dec 2015 11:33:07 +0100},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/tse/XuXCMCL15},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Sifter: A Service Isolation Strategy for Internet Applications","date":"2019","authors":["Chunyang Ye","Shing-Chi Cheung","W.K. Chan"],"venue":"IEEE Transactions on Services Computing 2019","venueShort":"TSC","tags":[],"abstract":"\n Service oriented architecture (SOA) provides a flexible platform to build collaborative Internet applications by composing existing self-contained and autonomous services. However, the implicit interactions among the concurrently provisioned services may introduce interference to Internet applications and cause them behave abnormally. It is thus desirable to isolate services to safeguard their application consistency. Existing approaches mostly address this problem by restricting concurrent execution of services to avoid all the implicit interactions. These approaches, however, compromise the performance and flexibility of Internet applications due to the long running nature of services. This paper presents Sifter, a new service isolation strategy for Internet applications. We devise in this strategy a novel static approach to analyze the potential implicit interactions among the services and their impacts on the consistency of the associated Internet applications. By locating only those afflicted implicit interactions that may violate the application consistency, a novel approach based on exception handling and behavior constraints is customized to involved services to eliminate their impacts. We show that this approach exempts the consistency property of Internet applications from being interfered at runtime. The experimental results show that our approach has a better performance than existing solutions.\n ","paperUrl":"materials/TSC-cyye.pdf","bibtex":"@ARTICLE{8493286,\nauthor={C. Ye and Shing-Chi Cheung and W. K. Chan},\njournal={IEEE Transactions on Services Computing},\ntitle={Sifter: A Service Isolation Strategy for Internet Applications},\nyear={2019},\nvolume={},\nnumber={},\npages={1-1},\nmonth={},}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Testing Multithreaded Programs via Thread Speed Control","date":"2018","authors":["Dongjie Chen","Yanyan Jiang","Chang Xu","Xiaoxing Ma","Jian Lu"],"venue":"26th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE 2018), Lake Buena Vista, Florida, USA, Nov 2018","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Intensive dependencies of a Java project on third-party libraries can easily lead to the presence of multiple library or class versions on its classpath. When this happens, JVM will load one version and shadows the others. Dependency conflict (DC) issues occur when the loaded version fails to cover a required feature (e.g., method) referenced by the project, thus causing runtime exceptions. However, the warnings of duplicate classes or libraries detected by existing build tools such as Maven can be benign since not all instances of duplication will induce runtime exceptions, and hence are often ignored by developers. In this paper, we conducted an empirical study on real-world DC issues collected from large open source projects. We studied the manifestation and fixing patterns of DC issues. Based on our findings, we designed Decca, an automated detection tool that assesses DC issues' severity and filters out the benign ones. Our evaluation results on 30 projects show that Decca achieves a precision of 0.923 and recall of 0.766 in detecting high-severity DC issues. Decca also detected new DC issues in these projects. Subsequently, 20 DC bug reports were filed, and 11 of them were confirmed by developers. Issues in 6 reports were fixed with our suggested patches.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/ESECFSE18.pdf","projectUrl":"https://midwinter1993.github.io/Schnauzer/","bibtex":"@inproceedings{chen_testing_2018,\n author = {Dongjie Chen and Yanyan Jiang and Chang Xu and Xiaoxing Ma and Jian Lu},\n title = {Testing multithreaded programs via thread speed control},\n pages = {to appear},\n year = {2018},\n booktitle = {Proceedings of the 26th Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE)},\n pdf = {/spar/publication/chen_testing_2018.pdf},\n code = {https://midwinter1993.github.io/Schnauzer/},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Understanding and Detecting Callback Compatibility Issues for Android Applications","date":"2018","authors":["Huaxun Huang","Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"2018 33rd ACM/IEEE International Conference on Automated Software Engineering (ASE '18), September 2018, Montpellier, France","venueShort":"ASE","tags":[],"abstract":"\n The control flows of Android apps are largely driven by the protocols that govern how callback APIs are invoked in response to various event.\nWhen these callback APIs evolve along with the Android framework, the changes in their invocation protocols can induce unexpected control flows to existing Android apps, causing various compatibility issues. We refer to these issues as callback compatibility issues.\nWhile Android framework updates have received due attention, little is known about their impacts on app control flows and the callback compatibility issues thus induced.\nTo bridge the gap, we examined Android documentations\nand conducted an empirical study on 100 real-world callback compatibility issues\nto investigate how these issues were induced by callback API evolutions. \nBased on our empirical findings, we propose a graph-based model to capture the control flow inconsistencies caused by API evolutions and devise a static analysis technique, CIDER, to detect callback compatibility issues.\nOur evaluation of CIDER on 20 popular open-source Android apps shows that CIDER is effective. It detected 13 new callback compatibility issues in these apps, among which 12 issues were confirmed and 9 issues were fixed.\n ","paperUrl":"materials/callback.pdf","projectUrl":"https://cideranalyzer.github.io/","bibtex":"@inproceedings {ASE18,\n title = {{Understanding and Detecting Callback Compatibility Issues for Android Applications}},\n author = {Huaxun Huang, Lili Wei, Yepang Liu, Shing-Chi Cheung},\n booktitle = {Proceedings of the 2018 33rd ACM/IEEE International Conference on Automated Software Engineering, {ASE} 2018},\n year = {2018},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"A Tale of Two Cities: How WebView Induces Bugs to Android Applications","date":"2018","authors":["Jiajun Hu","Lili Wei","Yepang Liu","Shing-Chi Cheung","Huaxun Huang"],"venue":"Proceedings of 2018 33rd ACM/IEEE International Conference on Automated Software Engineering (ASE'18), September 3-7, 2018, Montpellier, France","venueShort":"ASE","tags":[],"abstract":"\n WebView is a widely used Android component that augments a native app with web browser capabilities. It eases the interactions between an app’s native code and web code. However, the interaction mechanism of WebView induces new types of bugs in Android apps. Understanding the characteristics and manifestation of these WebView-induced bugs (ωBugs for short) facilitates the correct usages of WebViews in Android apps. This motivates us to conduct the first empirical study on ωBugs based on those found in popular open-source Android apps. Our study identified the major root causes and consequences of ωBugs and made interesting observations that can be leveraged for detecting and diagnosing ωBugs. Based on the empirical study, we further propose an automated testing technique ωDroid to effectively expose ωBugs in Android apps. In our experiments, ωDroid successfully discovered 30 unique and previously-unknown ωBugs when applied to 146 open-source Android apps. We reported the 30 ωBugs to the corresponding app developers. Out of these 30 ωBugs, 14 were confirmed and 7 of them were fixed. This shows that ωDroid can effectively detect ωBugs that are of the developers’ concern.\n ","paperUrl":"materials/wDroid.pdf","projectUrl":"http://home.cse.ust.hk/~jhuao/wDroid.html","bibtex":"@inproceedings {ASE18Hu,\n title = {{A Tale of Two Cities: How WebView Induces Bugs to Android Applications}},\n author = {Jiajun Hu, Lili Wei, Yepang Liu, Shing-Chi Cheung, Huaxun Huang},\n booktitle = {{Proceedings of the 2018 33rd ACM/IEEE International Conference on Automated Software Engineering (ASE’18)}},\n year = {2018},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Synthesizing Relation-Aware Entity Transformation by Examples","date":"2018","authors":["Jiarong Wu","Yanyan Jiang","Chang Xu","Shing-Chi Cheung","Xiaoxing Ma","Jian Lu"],"venue":"40th International Conference on Software Engineering (ICSE 2018 Poster)","venueShort":"ICSE Poster","tags":[],"abstract":"\n Recently, programming by examples (PBE) technique achieves a great success in processing and transforming data entities, yet existing approaches generally fall short on the tasks concerning entity relations. This paper presents ENTER, a domain-agnostic language for relation-aware entity transformation synthesis. It leverages the combination of two basic relations, the equivalence relation and the total order relation, to succinctly express complex entity relations. ENTER can be instantiated with domain-specific elements to solve a wide range of entity transformation tasks.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/ICSE18.pdf","bibtex":"@inproceedings{wu_synthesizing_2018,\n author = {Jiarong Wu and Yanyan Jiang and Chang Xu and S. C. Cheung and Xiaoxing Ma and Jian Lu},\n title = {Synthesizing relation-aware entity transformation by examples},\n booktitle = {Proceedings of the 40th International Conference on Software Engineering (ICSE Poster Track)},\n pages = {to appear},\n year = {2018},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"AATT+: Effectively Manifesting Concurrency Bugs in Android Apps","date":"2018","authors":["Jue Wang","Yanyan Jiang","Chang Xu","Qiwei Li","Tianxiao Gu","Jun Ma","Xiaoxing Ma","Jian Lu"],"venue":"Science of Computer Programming (SCP)","venueShort":"SCP","tags":[],"abstract":"\n Smartphones are indispensable in people’s daily activities, and smartphone apps tend to be increasingly concurrent due to the wide use of multi-core devices and technologies. Due to this tendency, developers are increasingly unable to tackle the complexity of concurrent apps and to avoid subtle concurrency bugs. To better address this issue, we propose a novel approach to detecting concurrency bugs in Android apps based on the fact that one can generate simultaneous input events and their schedules for an app, which would easily trigger concurrency bugs in an app. We conduct systematic state space exploration to find potentially conflicting resource accesses in an Android app. The app is then automatically pressure-tested by guided event and schedule generation. We implemented our prototype tool named AATT+ and evaluated it with two sets of real-world Android apps. Benchmarking using 15 Android apps with previously known concurrency bugs, AATT+ and existing concurrency-unaware techniques detected 10 and 1 bugs, respectively. Evaluated with another set of 17 popu- lar Android apps, AATT+ detected 11 concurrency bugs and 7 of them were previously unknown, achieving an over 80% higher detection rate than existing concurrency-unaware techniques.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/SCP18.pdf","projectUrl":"https://github.com/skull591/AATT","bibtex":"@inproceedings{wang_aatt_2018,\n author = {Jue Wang and Yanyan Jiang and Chang Xu and Qiwei Li and Tianxiao Gu and Jun Ma and Xiaoxing Ma and Jian Lu},\n title = {AATT+: Effectively manifesting concurrency bugs in Android apps},\n journal = {Science of Computer Programming (SCP)},\n year = {2018},\n volume = {163},\n pages = {1--18},\n url = {https://doi.org/10.1016/j.scico.2018.03.008},\n code = {https://github.com/skull591/AATT},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Hybrid CPU-GPU Constraint Checking: Towards Efficient Context Consistency","date":"2016","authors":["Jun Sui","Chang Xu","Shing-Chi Cheung","Wang Xi","Yanyan Jiang","Chun Cao","Xiaoxing Ma","Jian Lu"],"venue":"Information and Software Technology (IST) 2016","venueShort":"IST","tags":[],"abstract":"\n Context: modern software increasingly relies on contexts about computing environments to provide adaptive and smart services. Such contexts, captured and derived from environments of uncontrollable noises, can be inaccurate, incomplete or even in conflict with each other. This is known as the context inconsistency problem, and should be addressed by checking contexts in time to prevent abnormal behavior to applications. One popular way is to check application contexts against consistency constraints before their uses, but this can bring heavy computation due to tremendous amount of contexts in changing environments. Existing efforts improve the checking performance by incremental or concurrent computation, but they rely on CPU computing only and can consume valuable CPU capabilities that should otherwise be used by applications themselves.\n\nObjective: in this article, we propose GAIN, a GPU-supported technique to checking consistency constraints systematically and efficiently.\n\nMethod: GAIN can automatically recognize a constraint’s parallel units and associate these units and their runtime instances with matched contexts under checking. GAIN coordinates CPU and GPU and utilizes their capabilities for task preparation and context checking, respectively.\n\nResult: we evaluate GAIN experimentally with millions of real-life context data. The evaluation results show that GAIN can work at least 2–7 × faster and requires much less CPU usage than CPU-based techniques. Besides, GAIN can also work stably for different and varying workloads.\n\nConclusion: our experience with GAIN suggests its high efficiency in constraint checking for context consistency as well as its wide applicability to different application workloads.\n ","paperUrl":"http://www.sciencedirect.com/science/article/pii/S095058491500169X","bibtex":"@article{Sui_IST2016,\n author = {Jun Sui and\n Chang Xu and\n Shing{-}Chi Cheung and\n Wang Xi and\n Yanyan Jiang and\n Chun Cao and\n Xiaoxing Ma and\n Jian Lu},\n title = {Hybrid {CPU-GPU} constraint checking: Towards efficient context consistency},\n journal = {Information {&} Software Technology},\n volume = {74},\n pages = {230--242},\n year = {2016},\n url = {http://dx.doi.org/10.1016/j.infsof.2015.10.003},\n doi = {10.1016/j.infsof.2015.10.003},\n timestamp = {Mon, 25 Apr 2016 19:47:34 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/infsof/SuiXCX0CML16},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Taming Android Fragmentation: Characterizing and Detecting Compatibility Issues for Android Apps","date":"2016","authors":["Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"31st IEEE/ACM International Conference on Automated Software Engineering (ASE 2016), Singapore, Sept 2016","venueShort":"ASE","tags":["Android","Empirical Study","Fault Detection"],"awards":["Distinguished Paper"],"abstract":"\n Android ecosystem is heavily fragmented. The numerous combinations of different device models and operating system versions make it impossible for Android app developers to exhaustively test their apps. As a result, various compatibility issues arise, causing poor user experience. However, little is known on the characteristics of such fragmentation-induced compatibility issues and no mature tools exist to help developers quickly diagnose and fix these issues. To bridge the gap, we conducted an empirical study on 191 real-world compatibility issues collected from popular open-source Android apps. Our study characterized the symptoms and root causes of compatibility issues, and disclosed that the patches of these issues exhibit common patterns. With these findings, we propose a technique named FicFinder to automatically detect compatibility issues in Android apps. FicFinder performs static code analysis based on a model that captures Android APIs as well as their associated context by which compatibility issues are triggered. FicFinder reports actionable debugging information to developers when it detects potential issues. We evaluated FicFinder with 27 large-scale open-source Android apps. The results show that FicFinder can precisely detect compatibility issues in these apps and uncover previously-unknown issues.\n ","paperUrl":"http://sccpu2.cse.ust.hk/ficfinder/ASE_FicFinder.pdf","projectUrl":"http://sccpu2.cse.ust.hk/ficfinder/index.html","bibtex":"@inproceedings{Wei_ASE16,\n\tauthor = {Lili Wei and Yepang Liu and\n\t\t \t Shing{-}Chi Cheung},\n\ttitle = {Taming Android Fragmentation: Characterizing and Detecting Compatibility Issues for Android Apps},\n\tbooktitle = {Proceedings of the 31st IEEE/ACM International Conference on Automated Software Engineering, {ASE} 2016},\n\tyear = {2016}\n}","arxivUrl":null,"slidesUrl":null},{"title":"OASIS: Prioritizing Static Analysis Warnings for Android Apps Based on App User Reviews","date":"2017","authors":["Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"11th joint meeting of the European Software Engineering Conference and the ACM SIGSOFT Symposium on the Foundations of Software Engineering (ESEC/FSE 2017), Paderborn, Germany, Sept 2017","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Lint is a widely-used static analyzer for detecting bugs/issues in Android apps. However, it can generate many false warnings. One existing solution to this problem is to leverage project history data (e.g., bug fixing statistics) for warning prioritization. Unfortunately, such techniques are biased toward a project’s archived warnings and can easily miss newissues. Anotherweakness is that developers cannot readily relate the warnings to the impacts perceivable by users. To overcome these weaknesses, in this paper, we propose a semantics-aware approach, OASIS, to prioritizing Lint warnings by leveraging app user reviews. OASIS combines program analysis and NLP techniques to recover the intrinsic links between the Lint warnings for a given app and the user complaints on the app problems caused by the issues of concern. OASIS leverages the strength of such links to prioritize warnings. We evaluated OASIS on six popular and large-scale open-source Android apps. The results show that OASIS can effectively prioritize Lint warnings and help identify new issues that are previously-unknown to app developers.\n ","paperUrl":"materials/OASIS_author_copy.pdf","bibtex":"@inproceedings{Wei_FSE17,\n\tauthor = {Lili Wei and Yepang Liu and\n\t\t \t Shing{-}Chi Cheung},\n\ttitle = {OASIS: Prioritizing Static Analysis Warnings for Android Apps Based on App User Reviews},\n\tbooktitle = {joint meeting of the European Software Engineering Conference and the ACM SIGSOFT Symposium on the Foundations of Software Engineering, {ESEC/FSE} 2017},\n\tyear = {2017}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Understanding and Detecting Fragmentation-Induced Compatibility Issues for Android Apps","date":"2020","authors":["Lili Wei","Yepang Liu","Shing-Chi Cheung","Huaxun Huang","Xuan Lu","Xuanzhe Liu"],"venue":"IEEE Transactions on Software Engineering 2020","venueShort":"TSE","tags":["Android","Bug Detection"],"abstract":"\n Android ecosystem is heavily fragmented. The numerous combinations of different device models and operating system versions make it impossible for Android app developers to exhaustively test their apps, and thus various compatibility issues arise. Unfortunately, little is known on the characteristics of such fragmentation-induced compatibility issues. No mature tools exist to help developers quickly diagnose and fix these issues. To bridge the gap, we conducted an empirical study on 220 real-world compatibility issues collected from five popular open-source Android apps. We further interviewed Android practitioners and conducted an online survey to gain insights from real practices. Via the studies, we characterized compatibility issues, investigated common practices to handle compatibility issues, and disclosed that these issues exhibit common patterns. With these findings, we propose a technique, FicFinder, to automatically detect compatibility issues in Android apps. FicFinder performs static code analysis based on a model that captures Android APIs as well as their associated context by which compatibility issues can be triggered. FicFinder reports actionable debugging information to developers when it detects potential issues. We evaluated FicFinder with 53 large-scale open-source Android apps. The results show that FicFinder can precisely detect compatibility issues in these apps and uncover previously-unknown issues.\n ","paperUrl":"materials/TSE19-lili.pdf","bibtex":"@ARTICLE{DBLP:journals/tse/WeiLCHLL20,\n author = {Lili Wei and\n Yepang Liu and\n Shing{-}Chi Cheung and\n Huaxun Huang and\n Xuan Lu and\n Xuanzhe Liu},\n title = {Understanding and Detecting Fragmentation-Induced Compatibility Issues\n for Android Apps},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {46},\n number = {11},\n pages = {1176--1199},\n year = {2020},\n url = {https://doi.org/10.1109/TSE.2018.2876439},\n doi = {10.1109/TSE.2018.2876439},\n timestamp = {Thu, 31 Dec 2020 01:35:38 +0100},\n biburl = {https://dblp.org/rec/journals/tse/WeiLCHLL20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"PIVOT: Learning API-Device Correlations to Facilitate Android Compatibility Issue Detection","date":"2019","authors":["Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"International Conference on Software Engineering 2019, Technical Research Paper, Montréal, QC, Canada, 25 May - 31 May","venueShort":"ICSE","tags":[],"awards":["Distinguished Artifact"],"abstract":"\n The heavily fragmented Android ecosystem has induced various compatibility issues in Android apps. The search space for such fragmentation-induced compatibility issues (FIC issues) is huge, comprising three dimensions: device models, Android OS versions, and Android APIs. FIC issues, especially those arising from device models, evolve quickly with the frequent release of new device models to the market. As a result, an automated technique is desired to maintain timely knowledge of such FIC issues, which are mostly undocumented. In this paper, we propose such a technique, PIVOT, that automatically learns API-device correlations of FIC issues from existing Android apps. PIVOT extracts and prioritizes API-device correlations from a given corpus of Android apps. We evaluated PIVOT with popular Android apps on Google Play. Evaluation results show that PIVOT can effectively prioritize valid API-device correlations for app corpora collected at different time. Leveraging the knowledge in the learned API-device correlations, we further conducted a case study and successfully uncovered ten previously-undetected FIC issues in open-source Android apps.\n ","paperUrl":"materials/ICSE19-lili.pdf","projectUrl":"https://ficissuepivot.github.io/Pivot/","bibtex":"@inproceedings {ICSE19Wei,\n title = {{PIVOT: Learning API-Device Correlations to Facilitate Android Compatibility Issue Detection}},\n author = {Lili Wei and Yepang Liu and Shing-Chi Cheung},\n booktitle = {{Proceedings of the 41th International Conference on Software Engineering}, {ICSE 2019}},\n year = {2019},\n pages = {11}\n}","arxivUrl":null,"slidesUrl":null},{"title":"Which Generated Test Failures Are Fault Revealing? Prioritizing Failures Based on Inferred Precondition Violations using PAF","date":"2018","authors":["Mijung Kim","Shing-Chi Cheung","Sunghun Kim"],"venue":"The ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, Technical Research Paper, Lake Buena Vista, Florida, 4 Nov - 9 Nov 2018","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Automated unit testing tools, such as Randoop, have been developed to produce failing tests as means of finding faults. However, these tools often produce false alarms, so are not widely used in practice. The main reason for a false alarm is that the generated failing test violates an implicit precondition of the method under test, such as a field should not be null at the entry of the method. This condition is not explicitly programmed or documented but implicitly assumed\n\t\t\t\tby developers. To address this limitation, we propose a technique called Paf to cluster generated test failures due to the same cause and reorder them based on their likelihood of violating an implicit precondition of the method under test. From various test executions, Paf observes their dataflows to the variables whose values are used when the program fails. Based on the dataflow similarity and where these values are originated, Paf clusters failures and determines\n\t\t\t\ttheir likelihood of being fault revealing. We integrated Paf into Randoop. Our empirical results on open-source projects show that Paf effectively clusters fault revealing tests arising from the same\n\t\t\t\tfault and successfully prioritizes the fault-revealing ones.\n ","paperUrl":"materials/fse18-mijung.pdf","bibtex":"@inproceedings{kim2018paf,\n title={Which Generated Test Failures Are Fault Revealing? Prioritizing\nFailures Based on Inferred Precondition Violations using PAF},\n author={Kim, Mijung and Cheung, Shing-Chi and Kim, Sunghun},\n booktitle={Proceedings of the 2018 26th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE 2018)},\n pages={1--12},\n year={2018},\n organization={ACM}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Locus: Locating Bugs from Software Changes","date":"2016","authors":["Ming Wen","Rongxin Wu","Shing-Chi Cheung"],"venue":"31st IEEE/ACM International Conference on Automated Software Engineering (ASE 2016), Singapore, Sept 2016","venueShort":"ASE","tags":[],"abstract":"\n Various information retrieval (IR) based techniques have been proposed recently to locate bugs automatically at the file level. However, their usefulness is often compromised by the coarse granularity of files and the lack of contextual information. To address this, we propose to locate bugs using software changes, which offer finer granularity than files and provide important contextual clues for bug-fixing. We observe that bug inducing changes can facilitate the bug fixing process. For example, it helps triage the bug fixing task to the developers who committed the bug inducing changes or enables developers to fix bugs by reverting these changes. Our study further identifies that change logs and the naturally small granularity of changes can help boost the performance of IR-based bug localization. Motivated by these observations, we propose an IR-based approach Locus to locate bugs from software changes, and evaluate it on six large open source projects. The results show that Locus outperforms existing techniques at the source file level localization significantly. MAP and MRR in particular have been improved, on average, by 20.1% and 20.5%, respectively. Locus is also capable of locating the inducing changes within top 5 for 41.0% of the bugs. The results show that Locus can significantly reduce the number of lines needing to be scanned to locate the bug compared with existing techniques.\n ","paperUrl":"http://home.cse.ust.hk/~mwenaa/paper/ASE16-Locus.pdf","projectUrl":"http://www.cse.ust.hk/~mwenaa/Locus.html","bibtex":"@inproceedings{Wei_ASE16,\n\tauthor = {Ming Wen and Rongxin Wu and\n\t\t \t Shing{-}Chi Cheung},\n\ttitle = {Locus: Locating Bugs from Software Changes},\n\tbooktitle = {Proceedings of the 31st IEEE/ACM International Conference on Automated Software Engineering, {ASE} 2016},\n\tyear = {2016}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Context-Aware Patch Generation for Better Automated Program Repair","date":"2018","authors":["Ming Wen","Junjie Chen","Rongxin Wu","Dan Hao","Shing-Chi Cheung"],"venue":"International Conference on Software Engineering, Technical Research Paper, Gothenburg, Sweden, May 27 - 3 June 2018","venueShort":"ICSE","tags":[],"abstract":"\n The effectiveness of search-based automated program repair is limited in the number of correct patches that can be successfully generated.\nThere are two causes of such limitation. \nFirst, the search space does not contain the correct patch. \nSecond, the search space is huge and therefore the correct patch cannot be generated (ie correct patches are either generated after incorrect plausible ones or not generated within the time budget).\n\nTo increase the likelihood of including the correct patches in the search space, we propose to work at a fine granularity in terms of AST nodes.\nThis, however, will further enlarge the search space, increasing the challenge to find the correct patches.\nWe address the challenge by devising a strategy to prioritize the candidate patches based on their likelihood of being correct.\nSpecifically, we study the use of AST nodes' context information to estimate the likelihood.\n\nIn this paper, we propose CapGen, a context-aware patch generation technique.\nThe novelty which allows CapGen to produce more correct patches lies in three aspects:\n(1) The fine-granularity design enables it to find more correct fixing ingredients;\n(2) The context-aware prioritization of mutation operators enables it to constrain the search space;\n(3) Three context-aware models enable it to rank correct patches at high positions before incorrect plausible ones.\nWe evaluate CapGen on Defects4J and compare it with the state-of-the-art program repair techniques.\nOur evaluation shows that CapGen outperforms and complements existing techniques.\nCapGen achieves a high precision of 84.00% and can prioritize the correct patches before 98.78% of the incorrect plausible ones.\n ","paperUrl":"materials/Repair.pdf","bibtex":"@inproceedings {ICSE18,\n title = {{Context-Aware Patch Generation for Better Automated Program Repair}},\n author = {Ming, Wen and Junjie, Chen and Rongxin, Wu and Dan, Hao and Shing-Chi, Cheung},\n booktitle = {{Proceedings of the 40th International Conference on Software Engineering}},\n series = {ICSE 2016},\n year = {2018},\n doi = {10.1145/3180155.3180233},\n url = {http://home.cse.ust.hk/~mwenaa/paper/Repair.pdf},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"How Well Do Change Sequences Predict Defects? Sequence Learning from Software Changes","date":"2020","authors":["Ming Wen","Rongxin Wu","Shing-Chi Cheung"],"venue":"IEEE Transactions on Software Engineering 2020","venueShort":"TSE","tags":["Software Analytics","Defect Prediction"],"abstract":"\n Software defect prediction, which aims to identify defective modules, can assist developers in finding bugs and prioritizing limited quality assurance resources. Various features to build defect prediction models have been proposed and evaluated. Among them, process metrics are one important category. Yet, existing process metrics are mainly encoded manually from change histories and ignore the sequential information arising from the changes during software evolution. Unlike traditional process metrics used for existing defect prediction models, change sequences are mostly vectors of variable length. This makes it difficult to apply such sequences directly in prediction models that are driven by conventional classifiers. To resolve this challenge, we utilize Recurrent Neural Network (RNN), which is a deep learning technique, to encode features from sequence data automatically. In this paper, we propose a novel approach called Fences, which extracts six types of change sequences covering different aspects of software changes via fine-grained change analysis. It approaches defects prediction by mapping it to a sequence labeling problem solvable by RNN. Our evaluations on 10 open source projects show that Fences can predict defects with high performance. Fences also outperforms the state-of-the-art technique which learns semantic features automatically from static code via deep learning.\n ","paperUrl":"materials/TSE19-ming.pdf","bibtex":"@article{DBLP:journals/tse/WenWC20,\n author = {Ming Wen and\n Rongxin Wu and\n Shing{-}Chi Cheung},\n title = {How Well Do Change Sequences Predict Defects? Sequence Learning from\n Software Changes},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {46},\n number = {11},\n pages = {1155--1175},\n year = {2020},\n url = {https://doi.org/10.1109/TSE.2018.2876256},\n doi = {10.1109/TSE.2018.2876256},\n timestamp = {Tue, 02 Feb 2021 18:29:15 +0100},\n biburl = {https://dblp.org/rec/journals/tse/WenWC20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Exploring and Exploiting the Correlations between Bug-Inducing and Bug-Fixing Commits","date":"2019","authors":["Ming Wen","Rongxin Wu","Yepang Liu","Yongqiang Tian","Xuan Xie","Shing-Chi Cheung","Zhendong Su"],"venue":"The ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering 2019, Technical Research Paper, Tallinn, Estonia","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Bug-inducing commits provide important information to understand when and how bugs were introduced.\n\t\t\t\tTherefore, they have been extensively investigated by existing studies and frequently leveraged to facilitate bug fixings in industrial practices.\n\n\t\t\t\tDue to the importance of bug-inducing commits in software debugging,\n\t\t\t\twe are motivated to conduct the first systematic empirical study to explore the correlations between bug-inducing and bug-fixing commits in terms of code elements and modifications.\n\t\t\t\tTo facilitate the study, we collected the inducing and fixing commits for 333 bugs from seven large open-source projects.\n\t\t\t\tThe empirical findings reveal important and significant correlations between a bug's inducing and fixing commits.\n\t\t\t\tWe further exploit the usefulness of such correlation findings from two aspects.\n\t\t\t\tFirst, they explain why the SZZ algorithm, the most widely-adopted approach to collecting bug-inducing commits, is imprecise.\n\t\t\t\tIn view of SZZ's imprecision, we revisited the findings of previous studies based on SZZ,\n\t\t\t\tand found that 8 out of 10 previous findings are significantly affected by SZZ's imprecision.\n\t\t\t\tSecond, they shed lights on the design of automated debugging techniques.\n\t\t\t\tFor demonstration, we designed approaches that exploit the correlations with respect to statements and change actions.\n\t\t\t\tOur experiments on \textsc{Defects4J} show that our approaches can boost the performance of fault localization significantly and also advance existing APR techniques.\n ","paperUrl":"materials/FSE19-ming.pdf","projectUrl":"https://github.com/justinwm/InduceBenchmark","bibtex":"@inproceedings{wen2019exploring,\n title={Exploring and Exploiting the Correlations between Bug-Inducing and Bug-Fixing Commits.},\n author={Wen, Ming and Wu, Rongxin and Liu, Yepang and Tian, Yongqiang and Xie, Xuan and Cheung, Shing-Chi and Su, Zhendong},\n booktitle={Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},\n to appear,\n year={2019},\n organization={ACM}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Exposing Library API Misuses via Mutation Analysis","date":"2019","authors":["Ming Wen","Yepang Liu","Rongxin Wu","Xuan Xie","Shing-Chi Cheung","Zhendong Su"],"venue":"International Conference on Software Engineering 2019, Technical Research Paper, Montréal, QC, Canada, 25 May - 31 May","venueShort":"ICSE","tags":[],"abstract":"\n Misuses of library APIs are pervasive and often lead to software crashes and vulnerability issues. Various static analysis tools have been proposed to detect library API misuses. They often involve mining frequent patterns from a large number of correct API usage examples, which can be hard to obtain in practice. They also suffer from low precision due to an over-simplified assumption that a deviation from frequent usage patterns indicates a misuse.\n\t\t\t\tWe make two observations on the discovery of API misuse patterns. First, API misuses can be represented as mutants of the corresponding correct usages. Second, whether a mutant will introduce a misuse can be validated via executing it against a test suite and analyzing the execution information. Based on these observations, we propose MUTAPI, the first approach to discovering API misuse patterns via mutation analysis. To effectively mimic API misuses based on correct usages, we first design eight effective mutation operators inspired by the common characteristics of API misuses. MUTAPI generates mutants by applying these mutation operators on a set of client projects and collects mutant-killing tests as well as the associated stack traces. Misuse patterns are discovered from the killed mutants that are prioritized according to their likelihood of causing API misuses based on the collected information. We applied MUTAPI on 16 client projects with respect to 73 popular Java APIs. The results show that MUTAPI is able to discover substantial API misuse patterns with a high precision of 0.78. It also achieves a recall of 0.49 on the MUBENCH benchmark, which outperforms the state-of-the-art techniques.\n ","paperUrl":"materials/ICSE19-ming.pdf","bibtex":"@inproceedings {WEN2019API,\n title = {{Exposing Library API Misuses via Mutation Analysis}},\n author = {Ming, Wen and Yepang, Liu and Rongxin, Wu and Xuan, Xie and Shing-Chi, Cheung and Zhendong, Su},\n booktitle = {{Proceedings of the 41th International Conference on Software Engineering}},\n series = {ICSE 2019},\n year = {2019},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Historical Spectrum based Fault Localization","date":"2021","authors":["Ming Wen","Junjie Chen","Yongqiang Tian","Rongxin Wu","Dan Hao","Shi Han","Shing-Chi Cheung"],"venue":"Transactions of Software Engineering 2021","venueShort":"TSE","tags":[],"abstract":"\n Spectrum-based fault localization (SBFL) techniques are widely studied and have been evaluated to be effective in locating faults. Recent studies also showed that developers from industry value automated SBFL techniques. However, their effectiveness is still limited by two main reasons. First, the test coverage information leveraged to construct the spectrum does not reflect the root cause directly. Second, SBFL suffers from the tie issue so that the buggy code entities can not be well differentiated from non-buggy ones. To address these challenges, we propose to leverage the information of version histories in fault localization based on the following two intuitions. First, version histories record how bugs are introduced to software projects and this information reflects the root cause of bugs directly. Second, the evolution histories of code can help differentiate those suspicious code entities ranked in tie by SBFL. Our intuitions are also inspired by the observations on debugging practices from large open source projects and industry.\n\t\t\t\tBased on the intuitions, we propose a novel technique HSFL (historical spectrum based fault localization). Specifically, HSFL identifies bug-inducing commits from the version history in the first step. It then constructs historical spectrum (denoted as Histrum) based on bug-inducing commits, which is another dimension of spectrum orthogonal to the coverage based spectrum used in SBFL. HSFL finally ranks the suspicious code elements based on our proposed Histrum and the conventional spectrum. HSFL outperforms the state-of-the-art SBFL techniques significantly on the Defects4J benchmark. Specifically, it locates and ranks the buggy statement at Top-1 for 77.8% more bugs as compared with SBFL, and 33.9% more bugs at Top-5. Besides, for the metrics MAP and MRR, HSFL achieves an average improvement of 28.3% and 40.8% over all bugs, respectively. Moreover, HSFL can also outperform other six families of fault localization techniques, and our proposed Histrum model can be integrated with different families of techniques and boost their performance.\n ","paperUrl":"materials/TSE20-ming.pdf","projectUrl":"https://github.com/justinwm/HSFL/","bibtex":"@article{WEN_TSE21,\n author = {Ming, Wen and Junjie, Chen and Yongqiang, Tian and Rongxin, Wu and Dan, Hao and Shi, Han and Shing-Chi, Cheung},\n title = {Historical Spectrum based Fault Localization},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {47},\n number = {11},\n pages = {2348--2368},\n year = {2021},\n url = {https://doi.org/10.1109/TSE.2019.2948158},\n doi = {10.1109/TSE.2019.2948158}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"CrashLocator: Locating Crashing Faults based on Crash Stacks","date":"2014","authors":["Rongxin Wu","Hongyu Zhang","Shing-Chi Cheung","Sunghun Kim"],"venue":"International Symposium on Software Testing and Analysis (ISSTA 2014), San Jose, California, USA, July 2014","venueShort":"ISSTA","tags":["Fault Localization"],"awards":["Distinguished Paper"],"abstract":"\n Software crash is common. When a crash occurs, software developers can receive a report upon user permission. A crash report typically includes a call stack at the time of crash. An important step of debugging a crash is to identify faulty functions, which is often a tedious and labor-intensive task. In this paper, we propose CrashLocator, a method to locate faulty functions using the crash stack information in crash reports. It deduces possible crash traces (the failing execution traces that lead to crash) by expanding the crash stack with functions in static call graph. It then calculates the suspiciousness of each function in the approximate crash traces. The functions are then ranked by their suspiciousness scores and are recommended to developers for further investigation. We evaluate our approach using real-world Mozilla crash data. The results show that our approach is effective: we can locate 50.6%, 63.7% and 67.5% of crashing faults by examining top 1, 5 and 10 functions recommended by CrashLocator, respectively. Our approach outperforms the conventional stack-only methods significantly.\n ","paperUrl":"http://dl.acm.org/citation.cfm?doid=2610384.2610386","bibtex":"@inproceedings{DBLP:conf/issta/WuZCK14,\n author = {Rongxin Wu and\n Hongyu Zhang and\n Shing{-}Chi Cheung and\n Sunghun Kim},\n title = {CrashLocator: locating crashing faults based on crash stacks},\n booktitle = {International Symposium on Software Testing and Analysis, {ISSTA}\n '14, San Jose, CA, {USA} - July 21 - 26, 2014},\n pages = {204--214},\n year = {2014},\n crossref = {DBLP:conf/issta/2014},\n url = {http://doi.acm.org/10.1145/2610384.2610386},\n doi = {10.1145/2610384.2610386},\n timestamp = {Sun, 13 Jul 2014 13:49:26 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/issta/WuZCK14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null},{"title":"Casper: An Efficient Approach to Call Trace Collection","date":"2016","authors":["Rongxin Wu","Xiao Xiao","Shing-Chi Cheung","Hongyu Zhang","Charles Zhang"],"venue":"43rd ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages (POPL 2016)","venueShort":"POPL","tags":[],"abstract":"\n Call traces, i.e., sequences of function calls and returns, are fundamental to a wide range of program analyses such as bug reproduction, fault diagnosis, performance analysis, and many others. The conventional approach to collect call traces that instruments each function call and return site incurs large space and time overhead. Our approach aims at reducing the recording overheads by instrumenting only a small amount of call sites while keeping the capability of recovering the full trace. We propose a call trace model and a logged call trace model based on an LL(1) grammar, which enables us to define the criteria of a feasible solution to call trace collection. Based on the two models, we prove that to collect call traces with minimal instrumentation is an NP-hard problem. We then propose an efficient approach to obtaining a suboptimal solution. We implemented our approach as a tool Casper and evaluated it using the DaCapo benchmark suite. The experiment results show that our approach causes significantly lower runtime (and space) overhead than two state-of-the-arts approaches.\n ","paperUrl":"http://home.cse.ust.hk/~wurongxin/files/wurongxin_popl2016.pdf","bibtex":"@inproceedings{Wu_POPL2016,\n author = {Rongxin Wu and\n Xiao Xiao and\n Shing{-}Chi Cheung and\n Hongyu Zhang and\n Charles Zhang},\n title = {Casper: an efficient approach to call trace collection},\n booktitle = {Proceedings of the 43rd Annual {ACM} {SIGPLAN-SIGACT} Symposium on\n Principles of Programming Languages, {POPL} 2016, St. Petersburg,\n FL, USA, January 20 - 22, 2016},\n pages = {678--690},\n year = {2016},\n crossref = {DBLP:conf/popl/2016},\n url = {http://doi.acm.org/10.1145/2837614.2837619},\n doi = {10.1145/2837614.2837619},\n timestamp = {Wed, 09 Mar 2016 08:11:59 +0100},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/popl/WuXCZZ16},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"ChangeLocator: Locate Crash-Inducing Changes Based on Crash Reports","date":"2018","authors":["Rongxin Wu","Ming Wen","Shing-Chi Cheung","Hongyu Zhang"],"venue":"Journal of Empirical Software Engineering (EmSE 2018)","venueShort":"EmSE","tags":[],"abstract":"\n Software crashes are severe manifestations of software bugs. Debugging crashing bugs is tedious and time-consuming. Understanding software changes that induce a crashing bug can provide useful contextual information for bug fixing and is highly demanded by developers. Locating the bug inducing changes is also useful for automatic program repair, since it narrows down the root causes and reduces the search space of bug fix location. However, currently there are no systematic studies on locating the software changes to a source code repository that induce a crashing bug reflected by a bucket of crash reports. To tackle this problem, we first conducted an empirical study on characterizing the bug inducing changes for crashing bugs (denoted as crash-inducing changes). We also propose ChangeLocator, a method to automatically locate crash-inducing changes for a given bucket of crash reports. We base our approach on a learning model that uses features originated from our empirical study and train the model using the data from the historical fixed crashes. We evaluated ChangeLocator with six release versions of Netbeans project. The results show that it can locate the crash-inducing changes for 44.7%, 68.5%, and 74.5% of the bugs by examining only top 1, 5 and 10 changes in the recommended list, respectively. It significantly outperforms the existing state-of-the-art approach.\n ","paperUrl":"materials/ChangeLocator.pdf","bibtex":"@article{wu2018changelocator,\n title={ChangeLocator: locate crash-inducing changes based on crash reports},\n author={Wu, Rongxin and Wen, Ming and Cheung, Shing-Chi and Zhang, Hongyu},\n journal={Empirical Software Engineering},\n volume={23},\n number={5},\n pages={2866--2900},\n year={2018},\n publisher={Springer}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"How Effectively can Spreadsheet Anomalies be Detected: An Empirical Study","date":"2017","authors":["Ruiqing Zhang","Chang Xu","Shing-Chi Cheung","Ping Yu","Xiaoxing Ma","Jian Lu"],"venue":"The Journal of Systems and Software (JSS)","venueShort":"JSS ","tags":[],"abstract":"\n While spreadsheets are widely used, they have been found to be error-prone. Various techniques have been proposed to detect anomalies in spreadsheets, with varying scopes and effectiveness. Nevertheless, there is no empirical study comparing these techniques' practical usefulness and effectiveness. In this work, we conducted a large-scale empirical study of three state-of-the-art techniques on their effectiveness in detecting spreadsheet anomalies. Our study focused on the precision, recall rate, efficiency and scope. We found that one technique outperforms the other two in precision and recall rate of spreadsheet anomaly detection. Efficiency of the three techniques is acceptable for most spreadsheets, but they may not be scalable to large spreadsheets with complex formulas. Besides, they have different scopes for detecting different spreadsheet anomalies, thus complementing to each other. We also discussed limitations of these three techniques. Based on our findings, we give suggestions for future spreadsheet research.\n ","paperUrl":"http://cs.nju.edu.cn/_upload/tpl/01/55/341/template341/1_publications/JSS16.pdf","bibtex":"@article{Zhang_JSS17,\n\tauthor = {Ruiqing Zhang, Chang Xu, Shing-Chi Cheung, Ping Yu, Xiaoxing Ma and Jian Lu},\n\ttitle = {How Effective can Spreadsheet Anomalies be Detected: An Empirical Study},\n\tjournal = {The Journal of Systems and Software (JSS)},\n\tyear = {2017}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Automatic Detection and Update Suggestion for Outdated API Names in Documentation","date":"2021","authors":["Seonah Lee","Rongxin Wu","Shing-Chi Cheung","Sungwon Kang"],"venue":"IEEE Transactions on Software Engineering 2021","venueShort":"TSE","tags":["Software Analytics","API Misuse"],"abstract":"\n Application programming interfaces (APIs) continually evolve to meet ever-changing user needs, and documentation provides an authoritative reference for their usage. However, API documentation is commonly outdated because nearly all of the associated updates are performed manually. Such outdated documentation, especially with regard to API names, causes major software development issues. In this paper, we propose a method for automatically updating outdated API names in API documentation. Our insight is that API updates in documentation can be derived from API implementation changes between code revisions. To evaluate the proposed method, we applied it to four open source projects. Our evaluation results show that our method, FreshDoc, detects outdated API names in API documentation with 48% higher accuracy than the existing state-of-the-art methods do. Moreover, when we checked the updates suggested by FreshDoc against the developers? manual updates in the revised documentation, FreshDoc addressed 82% of the outdated names. When we reported 40 outdated API names found by FreshDoc via issue tracking systems, developers accepted 75% of the suggestions. These evaluation results indicate that FreshDoc can be used as a practical method for the detection and updating of API names in the associated documentation.\n ","paperUrl":"materials/TSE19-lee.pdf","bibtex":"@article{DBLP:journals/tse/LeeWCK21,\n author = {Seonah Lee and\n Rongxin Wu and\n Shing{-}Chi Cheung and\n Sungwon Kang},\n title = {Automatic Detection and Update Suggestion for Outdated {API} Names\n in Documentation},\n journal = {{IEEE} Trans. Software Eng.},\n volume = {47},\n number = {4},\n pages = {653--675},\n year = {2021},\n url = {https://doi.org/10.1109/TSE.2019.2901459},\n doi = {10.1109/TSE.2019.2901459},\n timestamp = {Thu, 29 Apr 2021 15:14:58 +0200},\n biburl = {https://dblp.org/rec/journals/tse/LeeWCK21.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Automatic Spreadsheet Cell Clustering and Smell Detection Using Strong and Weak Features","date":"2016","authors":["Shing-Chi Cheung","Wanjun Chen","Yepang Liu","Chang Xu"],"venue":"38th International Conference on Software Engineering (ICSE 2016), Austin, TX, USA, May 2016","venueShort":"ICSE","tags":[],"abstract":"\n Various techniques have been proposed to detect smells in spreadsheets, which are susceptible to errors. These techniques typically detect spreadsheet smells through a mechanism based on a fixed set of patterns or metric thresholds. Unlike conventional programs, tabulation styles vary greatly across spreadsheets. Smell detection based on fixed patterns or metric thresholds, which are insensitive to the varying tabulation styles, can miss many smells in one spreadsheet while reporting many spurious smells in another. In this paper, we propose CUSTODES to effectively cluster spreadsheet cells and detect smells in these clusters. The clustering mechanism can automatically adapt to the tabulation styles of each spreadsheet using strong and weak features. These strong and weak features capture the invariant and variant parts of tabulation styles, respectively. As smelly cells in a spreadsheet normally occur in minority, they can be mechanically detected as clusters' outliers in feature spaces. We implemented and applied CUSTODES to 70 spreadsheets files randomly sampled from the EUSES corpus. These spreadsheets contain 1,610 formula cell clusters. Experimental results confirmed that CUSTODES is effective. It successfully detected harmful smells that can induce computation anomalies in spreadsheets with an F-measure of 0.72, outperforming state-of-the-art techniques.\n ","paperUrl":"http://doi.acm.org/10.1145/2884781.2884796","projectUrl":"http://sccpu2.cse.ust.hk/custodes/","slidesUrl":"http://sccpu2.cse.ust.hk/castle/materials/Custodes.4.pdf","bibtex":"@inproceedings{Cheung_ICSE2016,\n author = {Shing{-}Chi Cheung and\n Wanjun Chen and\n Yepang Liu and\n Chang Xu},\n title = {{CUSTODES:} automatic spreadsheet cell clustering and smell detection\n using strong and weak features},\n booktitle = {Proceedings of the 38th International Conference on Software Engineering,\n {ICSE} 2016, Austin, TX, USA, May 14-22, 2016},\n pages = {464--475},\n year = {2016},\n crossref = {DBLP:conf/icse/2016},\n url = {http://doi.acm.org/10.1145/2884781.2884796},\n doi = {10.1145/2884781.2884796},\n timestamp = {Sun, 15 May 2016 11:55:22 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/CheungCLX16},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"Automating Object Transformations for Dynamic Software Updating via Online Execution Synthesis","date":"2018","authors":["Tianxiao Gu","Xiaoxing Ma","Chang Xu","Yanyan Jiang","Chun Cao","Jian Lu"],"venue":"32nd European Conference on Object-Oriented Programming (ECOOP 2018), Article 19","venueShort":"ECOOP","tags":[],"abstract":"\n Dynamic software updating (DSU) is a technique to upgrade a running software system on the fly without stopping the system. During updating, the runtime state of the modified components of the system needs to be properly transformed into a new state, so that the modified components can still correctly interact with the rest of the system. However, the transformation is non-trivial to realize due to the gap between the low-level implementations of two versions of a program. This paper presents AOTES, a novel approach to automating object transformations for dynamic updating of Java programs. AOTES bridges the gap by abstracting the old state of an object to a history of method invocations, and re-invoking the new version of all methods in the history to get the desired new state. AOTES requires no instrumentation to record any data and thus has no overhead during normal execution. We propose and implement a novel technique that can synthesize an equivalent history of method invocations based on the current object state only. We evaluated AOTES on software updates taken from Apache Commons Collections, Tomcat, FTP Server and SSHD Server. Experimental results show that AOTES successfully handled 51 of 61 object transformations of 21 updated classes, while two state-of-the-art approaches only handled 11 and 6 of 61, respectively.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/ECOOP18.pdf","bibtex":"@inproceedings{gu_automating_2018,\n author = {Tianxiao Gu and Xiaoxing Ma and Chang Xu and Yanyan Jiang and Chun Cao and Jian Lu},\n title = {Automating object transformations for dynamic software updating via online execution synthesis},\n pages = {to appear},\n year = {2018},\n booktitle = {Proceedings of the 32nd European Conference on Object-Oriented Programming (ECOOP)},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"RECONTEST: Effective Regression Testing of Concurrent Programs","date":"2015","authors":["Valerio Terragni","Shing-Chi Cheung","Charles Zhang"],"venue":"37th International Conference on Software Engineering (ICSE 2015), Florence, Italy, May 16-24, 2015","venueShort":"ICSE","tags":[],"abstract":"\n Concurrent programs proliferate as multi-core technologies advance. As a result, the conventional approach that selects a sub-set of test cases for regression testing without considering interleavings is insufficient. In this paper we present RECONTEST to address the problem by selecting the new interleavings that arise due to code changes. These interleavings must be explored in order to uncover regression bugs. RECONTEST efficiently selects new interleavings by first identifying shared memory accesses that are affected by the changes, and then exploring only those problematic interleavings that contain at least one of these accesses. We have implemented RECONTEST as an automated tool and evaluated it using 13 real-world concurrent program subjects. Our results show that RECONTEST can significantly reduce the regression testing cost without missing any faulty interleavings induced by code changes.\n ","paperUrl":"http://home.cse.ust.hk/~vterragni/files/Terragni_ICSE2015.pdf","bibtex":"@inproceedings{TERRAGNI_ICSE15,\n author = {Valerio Terragni and\n Shing{-}Chi Cheung and\n Charles Zhang},\n title = {{RECONTEST:} Effective Regression Testing of Concurrent Programs},\n booktitle = {37th {IEEE/ACM} International Conference on Software Engineering,\n {ICSE} 2015, Florence, Italy, May 16-24, 2015, Volume 1},\n pages = {246--256},\n year = {2015},\n url = {http://dx.doi.org/10.1109/ICSE.2015.45},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"CSNIPPEX: Automated Synthesis of Compilable Code Snippets from Q&A Sites","date":"2016","authors":["Valerio Terragni","Yepang Liu","Shing-Chi Cheung"],"venue":"Proceedings of the 25th International Symposium on Software Testing and Analysis (ISSTA 2016), Saarbrücken, Germany, July 2016","venueShort":"ISSTA","tags":[],"abstract":"\n Popular Q&A sites like StackOverflow have collected numerous code snippets. However, many of them do not have complete type information, making them uncompilable and inapplicable to various software engineering tasks. This paper analyzes this problem, and proposes a technique CSNIPPEX to automatically convert code snippets into compilable Java source code files by resolving external dependencies, generating import declarations, and fixing syntactic errors. We implemented CSNIPPEX as a plug-in for Eclipse and evaluated it with 242,175 StackOverflow posts that contain code snippets. CSNIPPEX successfully synthesized compilable Java files for 40,410 of them. It was also able to effectively recover import declarations for each post with a precision of 91.04% in a couple of seconds.\n ","paperUrl":"http://www.cse.ust.hk/~vterragni/files/Terragni_ISSTA2016.pdf","bibtex":"@inproceedings{Terragni_ISSTA16,\n author = {Valerio Terragni and Yepang Liu and\n Shing{-}Chi Cheung},\n title = {CSNIPPEX: Automated Synthesis of Compilable Code Snippets from Q&A Sites},\n booktitle = {Proceedings of the 2016 International Symposium on Software Testing\n and Analysis, {ISSTA} 2016},\n pages = {118--129},\n year = {2016},\n url = {http://dx.doi.org/10.1145/2931037.2931058}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Coverage-Driven Test Code Generation for Concurrent Classes","date":"2016","authors":["Valerio Terragni","Shing-Chi Cheung"],"venue":"38th International Conference on Software Engineering (ICSE 2016), Austin, TX, USA, May 2016","venueShort":"ICSE","tags":[],"abstract":"\n Previous techniques on concurrency testing have mainly focused on exploring the interleaving space of manually written test code to expose faulty interleavings of shared memory accesses. These techniques assume the availability of failure-inducing tests. In this paper, we present AutoConTest, a coverage-driven approach to generate effective concurrent test code that achieve high interleaving coverage. AutoConTest consists of three components. First, it computes the coverage requirements dynamically and iteratively during sequential test code generation, using a coverage metric that captures the execution context of shared memory accesses. Second, it smartly selects these sequential codes based on the computed result and assembles them for concurrent tests, achieving increased context-sensitive interleaving coverage. Third, it explores the newly covered interleavings. We have implemented AutoConTest as an automated tool and evaluated it using 6 real-world concurrent Java subjects. The results show that AutoConTest is able to generate effective concurrent tests that achieve high interleaving coverage and expose concurrency faults quickly. AutoConTest took less than 65 seconds (including program analysis, test generation and execution) to expose the faults in the program subjects.\n ","paperUrl":"http://www.cse.ust.hk/~vterragni/files/Terragni_ICSE2016.pdf","bibtex":"@inproceedings{Terragni_ICSE16,\n author = {Valerio Terragni and\n Shing{-}Chi Cheung},\n title = {Coverage-driven test code generation for concurrent classes},\n booktitle = {Proceedings of the 38th International Conference on Software Engineering,\n {ICSE} 2016, Austin, TX, USA, May 14-22, 2016},\n pages = {1121--1132},\n year = {2016},\n url = {http://doi.acm.org/10.1145/2884781.2884876}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"File-level socio-technical congruence and its relationship with bug proneness in OSS projects","date":"2019","authors":["Weiqiang Zhang","Shing-Chi Cheung","Zhenyu Chen","Yuming Zhou","Bin Luo"],"venue":"Journal of Systems and Software 156: 21-40 (2019)","venueShort":"JSS","tags":[],"abstract":"\n Coordination is important in software development. Socio-Technical Congruence (STC) is proposed to measure the match between coordination requirements and actual coordination activities. The previous work of Cataldo et al. computes STC in commercial projects and finds it related to software failures. In this paper, we study the relationship between file-level STC and bug proneness in Open Source Software (OSS) projects. We apply the fundamental STC framework to the OSS data setting and present a method of computing file-level STC based on our available data. We also propose a derivative STC metric called Missing Developer Links (MDL), which is to measure the amount of coordination breakdowns. In our empirical analysis on five OSS projects, we find that MDL is more related to bug proneness than STC. Furthermore, STC or MDL can be computed based on different types of file networks and developer networks, and we find out the best file network and the best developer network via an empirical study. We also evaluate the usefulness of STC or MDL metrics in bug prediction. This work is promising to help detect coordination issues in OSS projects.\n ","paperUrl":"https://www.sciencedirect.com/science/article/pii/S0164121219301177","bibtex":"@article{DBLP:journals/jss/ZhangCCZL19,\n author = {Weiqiang Zhang and\n Shing{-}Chi Cheung and\n Zhenyu Chen and\n Yuming Zhou and\n Bin Luo},\n title = {File-level socio-technical congruence and its relationship with bug\n proneness in {OSS} projects},\n journal = {Journal of Systems and Software},\n volume = {156},\n pages = {21--40},\n year = {2019},\n url = {https://doi.org/10.1016/j.jss.2019.05.030},\n doi = {10.1016/j.jss.2019.05.030},\n timestamp = {Thu, 05 Sep 2019 19:41:26 +0200},\n biburl = {https://dblp.org/rec/bib/journals/jss/ZhangCCZL19},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"A Survey on Dependability Improvement Techniques for Pervasive Computing Systems","date":"2015","authors":["Wenhua Yang","Yepang Liu","Chang Xu","Shing-Chi Cheung"],"venue":"SCIENCE CHINA Information Sciences (SCIS) 58(5), May 2015","venueShort":"SCIS","tags":[],"abstract":"\n The goal of this survey is to summarize the state-of-the-art research results and identify research challenges of developing and deploying dependable pervasive computing systems. We discuss the factors that affect the system dependability and the studies conducted to improve it with respect to these factors. These studies were categorized according to their similarities and differences in hope of shedding some insight into future research. There are three categories: context management, fault detection, and uncertainty handling. These three categories of work address the three most difficult problems of pervasive computing systems. First, pervasive computing systems’ perceived environments, which are also called their contexts, can vary intensively, and thus have a great impact on the systems’ dependability. Second, it is challenging to guarantee the correctness of the systems’ internal computations integrated with interactions with external environments for developers. Fault detection is then an important issue for improving dependability for these systems. Last but not least importantly, pervasive computing systems interact with their environments frequently. These interactions can be affected by many uncertainties, which can jeopardize the systems’ dependability. After a discussion of these pieces of work, we present an outlook for its future research directions.\n ","paperUrl":"http://link.springer.com/article/10.1007%2Fs11432-015-5300-3","bibtex":"@article{DBLP:journals/chinaf/YangLXC15,\n author = {Wenhua Yang and\n Yepang Liu and\n Chang Xu and\n Shing{-}Chi Cheung},\n title = {A survey on dependability improvement techniques for pervasive computing\n systems},\n journal = {{SCIENCE} {CHINA} Information Sciences},\n volume = {58},\n number = {5},\n pages = {1--14},\n year = {2015},\n url = {http://dx.doi.org/10.1007/s11432-015-5300-3},\n doi = {10.1007/s11432-015-5300-3},\n timestamp = {Wed, 29 Apr 2015 12:35:39 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/chinaf/YangLXC15},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Efficient Validation of Self-adaptive Applications by Counterexample Probability Maximization","date":"2018","authors":["Wenhua Yang","Chang Xu","Minxue Pan","Chun Cao","Xiaoxing Ma","Jian Lu"],"venue":"The Journal of Systems and Software (JSS)","venueShort":"JSS","tags":[],"abstract":"\n Self-adaptive applications’ executions can be affected by uncertainty factors like unreliable sensing and flawed adaptation and therefore often error-prone. Existing methods can verify the applications suffering uncertainty and report counterexamples. However, such verification results can deviate from reality when the uncertainty specification used in verification is itself imprecise. This thus calls for further validation of reported counterexamples. One outstanding challenge in counterexample validation is that the probabilities of counterex- amples occurring in real environment are usually very low, which makes the validation extremely inefficient. In this paper, we propose a novel approach to systematically deriving path-equivalent counterexamples with respect to origi- nal ones. The derived counterexamples guarantee to have higher probabilities, making them capable of being validated efficiently in field test. We evaluated our approach with real-world self-adaptive applications. The results reported that our approach significantly increased counterexample probabilities, and the derived counterexamples were also consistently and efficiently validated in both real environment and simulation.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/JSS18.pdf","bibtex":"@inproceedings{yang2018jss,\n\tauthor = {Wenhua Yang, Chang Xu, Minxue Pan, Chun Cao, Xiaoxing Ma, and Jian Lu},\n\ttitle = {The Journal of Systems and Software (JSS)},\n\tyear = {2018},\n\tpages = {82-99}\n\t}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Improving Verification Accuracy of CPS by Modeling and Calibrating Interaction Uncertainty","date":"2018","authors":["Wenhua Yang","Chang Xu","Minxue Pan","Xiaoxing Ma","Jian Lu"],"venue":"ACM Transactions on Internet Technology (TOIT)","venueShort":"TOIT","tags":[],"abstract":"\n Cyber-Physical Systems (CPS) intrinsically combine hardware and physical systems with software and network, which are together creating complex and correlated interactions. CPS applications often experience uncertainty in interacting with environment through unreliable sensor. They can be faulty and exhibit runtime errors if developers have not considered environmental interaction uncertainty adequately. Existing work in verifying CPS applications ignores interaction uncertainty and thus may overlook uncertainty-related faults. To improve verification accuracy, in this article we propose a novel approach to verifying CPS applications with explicit modeling of uncertainty arisen in the interaction between them and the environment. Our approach builds an Interactive State Machine (ISM) network for a CPS application and models interaction uncertainty by error ranges and distributions. Then it encodes both the application and uncertainty models to SMT formula to leverage SMT solvers searching for counterexamples that represent application failures. The precision of uncertainty model can affect the verification results. However, it may be difficult to model interaction uncertainty precisely enough at the beginning, because of the uncontrollable noise of sensors and insufficient data sample size. To further improve the accuracy of the verification results, we propose an approach to identifying and calibrating imprecise uncertainty models. We exploit the inconsistency between the counterexamples’ estimate and actual occurrence probabilities to identify possible imprecision in uncertainty models, and the calibration of imprecise models is to minimize the inconsistency, which is reduced to a Search- Based Software Engineering (SBSE) problem. We experimentally evaluated our verification and calibration approaches with real-world CPS applications, and the experimental results confirmed their effectiveness and efficiency.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/TOIT18.pdf","bibtex":"@inproceedings{yang2018toit,\n\tauthor = {Wenhua Yang, Chang Xu, Minxue Pan, Xiaoxing Ma, and Jian Lu},\n\ttitle = {Improving Verification Accuracy of CPS by Modeling and Calibrating Interaction Uncertainty},\n\tjournal = {ACM Transactions on Internet Technology (TOIT)},\n\tyear = {2018},\n\tpages = {1-37}\n\t}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Is Spreadsheet Ambiguity Harmful? Detecting and Repairing Spreadsheet Smells due to Ambiguous Computation","date":"2014","authors":["Wensheng Dou","Shing-Chi Cheung","Jun Wei"],"venue":"36th International Conference on Software Engineering (ICSE 2014), Hyderabad, India, May-Jun 2014","venueShort":"ICSE","tags":[],"abstract":"\n Spreadsheets are widely used by end users for numerical computation in their business. Spreadsheet cells whose computation is subject to the same semantics are often clustered in a row or column. When a spreadsheet evolves, these cell clusters can degenerate due to ad hoc modifications or undisciplined copy-and-pastes. Such degenerated clusters no longer keep cells prescribing the same computational semantics, and are said to exhibit ambiguous computation smells. Our empirical study finds that such smells are common and likely harmful. We propose AmCheck, a novel technique that automatically detects and repairs ambiguous computation smells by recovering their intended computational semantics. A case study using AmCheck suggests that it is useful for discovering and repairing real spreadsheet problems.\n ","paperUrl":"http://dl.acm.org/citation.cfm?doid=2568225.2568316","bibtex":"@inproceedings{DBLP:conf/icse/DouCW14,\n author = {Wensheng Dou and\n Shing{-}Chi Cheung and\n Jun Wei},\n title = {Is spreadsheet ambiguity harmful? detecting and repairing spreadsheet\n smells due to ambiguous computation},\n booktitle = {36th International Conference on Software Engineering, {ICSE} '14,\n Hyderabad, India - May 31 - June 07, 2014},\n pages = {848--858},\n year = {2014},\n crossref = {DBLP:conf/icse/2014},\n url = {http://doi.acm.org/10.1145/2568225.2568316},\n doi = {10.1145/2568225.2568316},\n timestamp = {Mon, 14 Sep 2015 15:13:50 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/DouCW14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Detecting Table Clones and Smells in Spreadsheets","date":"2016","authors":["Wensheng Dou","Shing-Chi Cheung","Chushu Gao","Chang Xu","Liang Xu","Jun Wei"],"venue":"24th ACM SIGSOFT International Symposium on the Foundations of Software Engineering (FSE 2016), Seattle, WA, USA, Nov 2016","venueShort":"FSE ","tags":[],"abstract":"\n Spreadsheets are widely used by end users for various business tasks, such as data analysis and financial reporting. End users may perform similar tasks by cloning a block of cells (table) in their spreadsheets. The corresponding cells in these cloned tables are supposed to keep the same or similar computational semantics. However, when spreadsheets evolve, thus cloned tables can become inconsistent due to ad-hoc modifications, and as a result suffer from smells. In this paper, we propose TableCheck to detect table clones and related smells due to inconsistency among them. We observe that two tables with the same header information at their corresponding cells are likely to be table clones. Inspired by existing finger-print-based code clone detection techniques, we developed a detection algorithm to detect this kind of table clones. We further detected outliers among corresponding cells as smells in the detected table clones. We implemented our idea into TableCheck, and applied it to real-world spreadsheets from the EUSES corpus. Experimental results show that table clones commonly exist (21.8%), and 25.6% of the spreadsheets with table clones suffer from smells due to inconsistency among these clones. TableCheck detected table clones and their smells with a precision of 92.2% and 85.5%, respectively, while existing techniques detected no more than 35.6% true smells that TableCheck could detect.\n ","paperUrl":"http://sccpu2.cse.ust.hk/castle/materials/fse16main-mainid258-p-e95dd6b-29549-preprint.pdf","slidesUrl":"http://sccpu2.cse.ust.hk/castle/materials/TableCheck_2016_11-17-1.pdf","bibtex":"@inproceedings{Dou_FSE16,\n\tauthor = {Wensheng Dou and Shing{-}Chi Cheung and Chushu Gao and Chang Xu and Liang Xu and Jun Wei},\n\ttitle = {Detecting Table Clones and Smells in Spreadsheets},\n\tbooktitle = {Proceedings of the 2016 International Symposium on the Foundations of Software Engineering, {FSE} 2016},\n\tyear = {2016}\n}","arxivUrl":null,"projectUrl":null,"awards":[]},{"title":"VEnron: A Versioned Spreadsheet Corpus and Related Evolution Analysis","date":"2016","authors":["Wensheng Dou","Liang Xu","Shing-Chi Cheung","Chushu Gao","Jun Wei","Tao Huang"],"venue":"38th International Conference on Software Engineering (ICSE 2016 - SEIP), Companion Volume, Austin, TX, USA, May 2016","venueShort":"ICSE SEIP","tags":[],"abstract":"\n In this paper, we propose a semi-automated approach that leverages spreadsheets’ contexts (e.g., attached emails) and contents to identify evolved spreadsheets and recover the embedded version information. We apply it to the released email archive of the Enron Corporation and build an industrial-scale, versioned spreadsheet corpus VEnron. Our approach first clusters spreadsheets that likely evolved from one to another into evolution groups based on various fragmented information, such as spreadsheet filenames, spreadsheet contents, and spreadsheet-attached emails. Then, it recovers the version information of the spreadsheets in each evolution group. VEnron enables us to identify interesting issues that can arise from spreadsheet evolution. For example, the versioned spreadsheets popularly exist in the Enron email archive; changes in formulas are common; and some groups (16.9%) can introduce new errors during evolution.\nAccording to our knowledge, VEnron is the first spreadsheet corpus with version information. It provides a valuable resource to understand issues arising from spreadsheet evolution.\n ","paperUrl":"http://delivery.acm.org/10.1145/2890000/2889238/p162-dou.pdf?ip=175.159.126.8&id=2889238&acc=ACTIVE%20SERVICE&key=CDD1E79C27AC4E65%2EFC30B8D6EF32B758%2E4D4702B0C3E38B35%2E4D4702B0C3E38B35&CFID=836117825&CFTOKEN=34377724&__acm__=1473671848_9a79ceac0a81a74ac3ee0d6561cb8330","projectUrl":"http://sccpu2.cse.ust.hk/venron/","bibtex":"@inproceedings{Dou_ICSE2016,\n author = {Wensheng Dou and\n Liang Xu and\n Shing{-}Chi Cheung and\n Chushu Gao and\n Jun Wei and\n Tao Huang},\n title = {VEnron: a versioned spreadsheet corpus and related evolution analysis},\n booktitle = {Proceedings of the 38th International Conference on Software Engineering,\n {ICSE} 2016, Austin, TX, USA, May 14-22, 2016 - Companion Volume},\n pages = {162--171},\n year = {2016},\n crossref = {DBLP:conf/icse/2016c},\n url = {http://doi.acm.org/10.1145/2889160.2889238},\n doi = {10.1145/2889160.2889238},\n timestamp = {Sun, 15 May 2016 12:23:10 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/DouXCGWH16},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"CACheck: Detecting and Repairing Cell Arrays in Spreadsheets","date":"2017","authors":["Wensheng Dou","Chang Xu","Shing-Chi Cheung","Jun Wei"],"venue":"IEEE Transactions on Software Engineering (TSE)","venueShort":"TSE","tags":[],"abstract":"\n Spreadsheets are widely used by end users for numerical computation in their business. Spreadsheet cells whose computation is subject to the same semantics are often clustered in a row or column as a cell array. When a spreadsheet evolves, the cells in a cell array can degenerate due to ad hoc modifications. Such degenerated cell arrays no longer keep cells prescribing the same computational semantics, and are said to exhibit ambiguous computation smells. We propose CACheck, a novel technique that automatically detects and repairs smelly cell arrays by recovering their intended computational semantics. Our empirical study on the EUSES and Enron corpora finds that such smelly cell arrays are common. Our study also suggests that CACheck is useful for detecting and repairing real spreadsheet problems caused by smelly cell arrays. Compared with our previous work AmCheck, CACheck detects smelly cell arrays with higher precision and recall rate.\n ","paperUrl":"https://doi.org/10.1109/TSE.2016.2584059","bibtex":"@article{Dou_TSE17,\n\tauthor = {Wensheng Dou, Chang Xu, Shing-Chi Cheung and Jun Wei},\n\ttitle = {CACheck: Detecting and Repairing Cell Arrays in Spreadsheets},\n\tjournal = {IEEE Transactions on Software Engineering (TSE)},\n\tyear = {2017}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"GreenDroid: Automated Diagnosis of Energy Inefficiency for Smartphone Applications","date":"2014","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung","Jian Lv"],"venue":"IEEE Transactions on Software Engineering 40(9), September 2014","venueShort":"TSE","tags":[],"abstract":"\n Smartphone applications’ energy efficiency is vital, but many Android applications suffer from serious energy inefficiency problems. Locating these problems is labor-intensive and automated diagnosis is highly desirable. However, a key challenge is the lack of a decidable criterion that facilitates automated judgment of such energy problems. Our work aims to address this challenge. We conducted an in-depth study of 173 open-source and 229 commercial Android applications, and observed two common causes of energy problems: missing deactivation of sensors or wake locks, and cost-ineffective use of sensory data. With these findings, we\npropose an automated approach to diagnosing energy problems in Android applications. Our approach explores an application’s state space by systematically executing the application using Java PathFinder (JPF). It monitors sensor and wake lock operations to detect missing deactivation of sensors and wake locks. It also tracks the transformation and usage of sensory data and judges whether they are effectively utilized by the application using our state-sensitive data utilization metric. In this way, our approach can generate detailed reports with actionable information to assist developers in validating detected energy problems. We built our approach as a tool, GreenDroid, on top of JPF. Technically, we addressed the challenges of generating user interaction events and scheduling event han- dlers in extending JPF for analyzing Android applications. We evaluated GreenDroid using 13 real-world popular Android applications. GreenDroid completed energy efficiency diagnosis for these applications in a few minutes. It successfully located real energy problems in these applications, and additionally found new unreported energy problems that were later confirmed by developers.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/TSE2014.pdf","projectUrl":"http://sccpu2.cse.ust.hk/greendroid/","bibtex":"@ARTICLE{Liu:TSE2014, \n author = {Liu, Yepang and Xu, Chang and Cheung, Shing-Chi and Lu, Jian}, \n journal = {IEEE Transactions on Software Engineering}, \n title = {GreenDroid: Automated Diagnosis of Energy Inefficiency for Smartphone Applications}, \n year = {2014}, \n volume = {40}, \n number = {9}, \n pages = {911-940}, \n doi = {10.1109/TSE.2014.2323982}, \n month = {Sept},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"CHECKERDROID: Automated Quality Assurance for Smartphone Applications","date":"2014","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung","Wenhua Yang"],"venue":"International Journal of Software and Informatics (IJSI)","venueShort":"IJSI","tags":[],"abstract":"\n Smartphone applications’ quality is vital. However, many smartphone applications on market suffer from various bugs. One major reason is that developers lack viable techniques to help expose potential bugs in their applications. This paper presents a practical dynamic analysis tool, CheckerDroid, to help developers automatically detect both functional and non-functional bugs in their Android applications. CheckerDroid currently supports the detection of the following three types of bugs: null pointer exception, resource leak and sensor listener misusage. We built CheckerDroid by extending Java PathFinder (JPF), a widely-used model checker for general Java programs. Our extension addresses two technical challenges. First, Android applications are event-driven and lack explicit control flow information between event handlers. Second, Android applications closely hinge on native framework libraries, whose implementations are platform-dependent. To address these challenges, we derive event handler scheduling policies from Android documentations, and encode them to guide CheckerDroid to realistically execute Android applications. Besides, we modeled the side effects for a critical set of Android APIs such that CheckerDroid can conduct bug detection precisely. To evaluate CheckerDroid, we conducted experiments with seven popular real-world Android applications. CheckerDroid analyzed these applications in a few minutes, and successfully located real bugs in them.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/IJSI2014.pdf","bibtex":"@article{DBLP:journals/ijsi/LiuXCY14,\n author = {Yepang Liu and\n Chang Xu and\n S. C. Cheung and\n Wenhua Yang},\n title = {{CHECKERDROID} : Automated Quality Assurance for Smartphone Applications},\n journal = {Int. J. Software and Informatics},\n volume = {8},\n number = {1},\n pages = {21--41},\n year = {2014},\n url = {http://www.ijsi.org/ch/reader/view_abstract.aspx?file_no=i181},\n timestamp = {Sun, 14 Aug 2016 14:06:59 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/ijsi/LiuXCY14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Characterizing and Detecting Performance Bugs for Smartphone Applications","date":"2014","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung"],"venue":"36th International Conference on Software Engineering (ICSE 2014), Hyderabad, India, May-Jun 2014","venueShort":"ICSE","tags":["Android","Empirical Study"],"awards":["Distinguished Paper"],"abstract":"\n Smartphone applications’ performance has a vital impact on user experience. However, many smartphone applications suffer from bugs that cause significant performance degradation, thereby losing their competitive edge. Unfortunately, people have little understanding of these performance bugs. They also lack effective techniques to fight with such bugs. To bridge this gap, we conducted a study of 70 real-world performance bugs collected from eight large-scale and popular Android applications. We studied the characteristics (e.g., bug types and how they manifested) of these bugs and identified their common patterns. These findings can support follow-up research on performance bug avoidance, testing, debugging and analysis for smartphone applications. To demonstrate the usefulness of our findings, we implemented a static code analyzer, PerfChecker, to detect our identified performance bug patterns. We experimentally evaluated PerfChecker by applying it to 29 popular Android applications, which comprise 1.1 million lines of Java code. PerfChecker successfully detected 126 matching instances of our performance bug patterns. Among them, 68 were quickly confirmed by developers as previouslynunknown issues that affect application performance, and 20 were fixed soon afterwards by following our optimization suggestions.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/ICSE2014.pdf","projectUrl":"http://sccpu2.cse.ust.hk/perfchecker","bibtex":"@inproceedings{DBLP:conf/icse/LiuXC14,\n author = {Yepang Liu and\n Chang Xu and\n Shing{-}Chi Cheung},\n title = {Characterizing and detecting performance bugs for smartphone applications},\n booktitle = {36th International Conference on Software Engineering, {ICSE} '14,\n Hyderabad, India - May 31 - June 07, 2014},\n pages = {1013--1024},\n year = {2014},\n crossref = {DBLP:conf/icse/2014},\n url = {http://doi.acm.org/10.1145/2568225.2568229},\n doi = {10.1145/2568225.2568229},\n timestamp = {Sun, 18 May 2014 16:12:57 +0200},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/LiuXC14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"slidesUrl":null},{"title":"Diagnosing Energy Efficiency and Performance for Mobile Internetware Applications: Challenges and Opportunities","date":"2015","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung"],"venue":"IEEE Software 32(1), Jan/Feb 2015","venueShort":"IEEE SOFTWARE","tags":[],"abstract":"\n Many smartphone applications' smart services are realized in a way that wastes energy or degrades performance, seriously affecting the user experience. What's worse, developers lack powerful tools to combat such problems, curbing the growth of Internet-based mobile computing. Research communities and industries have issued a strong call for effective techniques to diagnose energy and performance bugs in smartphone applications. This article describes bug characteristics, discusses diagnostic challenges, and reviews state-of-the-art diagnostic techniques. A case study shows how a representative tool analyzed commercial Android applications and the Samsung Mobile Software Developer's Kit, providing useful diagnostic information.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/ieeesoft15.pdf","bibtex":"@article{DBLP:journals/software/LiuXC15,\n author = {Yepang Liu and\n Chang Xu and\n Shing{-}Chi Cheung},\n title = {Diagnosing Energy Efficiency and Performance for Mobile Internetware\n Applications},\n journal = {{IEEE} Software},\n volume = {32},\n number = {1},\n pages = {67--75},\n year = {2015},\n url = {http://dx.doi.org/10.1109/MS.2015.4},\n doi = {10.1109/MS.2015.4},\n timestamp = {Tue, 12 Jan 2016 12:01:52 +0100},\n biburl = {http://dblp.uni-trier.de/rec/bib/journals/software/LiuXC15},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Understanding and Detecting Wake Lock Misuses for Android Applications","date":"2016","authors":["Yepang Liu","Chang Xu","Shing-Chi Cheung","Valerio Terrangi"],"venue":"24th ACM SIGSOFT International Symposium on the Foundations of Software Engineering (FSE 2016), Seattle, WA, USA, Nov 2016","venueShort":"FSE ","tags":[],"abstract":"\n Wake locks are widely used in Android apps to protect critical computations from being disrupted by device sleeping. Inappropriate use of wake locks often seriously impacts user experience. However, little is known on how wake locks are used in real-world Android apps and the impact of their misuses. To bridge the gap, we conducted a large-scale empirical study on 44,736 commercial and 31 open-source Android apps. By automated program analysis and manual investigation, we observed (1) common program points where wake locks are acquired and released, (2) 13 types of critical computational tasks that are often protected by wake locks, and (3) eight patterns of wake lock misuses that commonly cause functional and non-functional issues, only three of which had been studied by existing work. Based on our findings, we designed a static analysis technique, Elite, to detect two most common patterns of wake lock misuses. Our experiments on real-world subjects showed that Elite is effective and can outperform two state-of-the-art techniques.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/FSE2016.pdf","slidesUrl":"http://sccpu2.cse.ust.hk/castle/materials/ELITE-FSE2016-V3.pdf","bibtex":"@inproceedings{Liu_FSE16,\n\tauthor = {Yepang Liu and Chang Xu and\n\t\t \t Shing{-}Chi Cheung and Valerio Terragni},\n\ttitle = {Understanding and Detecting Wake Lock Misuses for Android Applications},\n\tbooktitle = {Proceedings of the 2016 International Symposium on the Foundations of Software Engineering, {FSE} 2016},\n\tyear = {2016}\n}","arxivUrl":null,"projectUrl":null,"awards":[]},{"title":"DroidLeaks: a comprehensive database of resource leaks in Android apps","date":"2019","authors":["Yepang Liu","Jue Wang","Lili Wei","Chang Xu","Shing-Chi Cheung","Tianyong Wu","Jun Yan","Jian Zhang"],"venue":"Empirical Software Engineering 2019","venueShort":"EmSE","tags":[],"abstract":"\n Resource leaks in Android apps are pervasive. They can cause serious performance degradation and system crashes. In recent years, many resource leak detection techniques have been proposed to help Android developers correctly manage system resources. Yet, there exist no common databases of real-world bugs for effectively comparing such techniques to understand their strengths and limitations. This paper describes our effort towards constructing such a bug database named DROIDLEAKS. To extract real resource leak bugs, we mined 124,215 code revisions of 34 popular open-source Android apps. After automated filtering and manual validation, we successfully found 292 fixed resource leak bugs, which cover a diverse set of resource classes, from 32 analyzed apps. To understand these bugs, we conducted an empirical study, which revealed the characteristics of resource leaks in Android apps and common patterns of resource management mistakes made by developers. To further demonstrate the usefulness of our work, we evaluated eight resource leak detectors from both academia and industry on DROIDLEAKS and performed a detailed analysis of their performance. We release DROIDLEAKS for public access to support future research.\n ","paperUrl":"https://link.springer.com/article/10.1007/s10664-019-09715-8","projectUrl":"https://zenodo.org/record/2589909#.XfxlvZP7TOR","bibtex":"@article{DBLP:journals/ese/LiuWWXCWYZ19,\n author = {Yepang Liu and\n Jue Wang and\n Lili Wei and\n Chang Xu and\n Shing{-}Chi Cheung and\n Tianyong Wu and\n Jun Yan and\n Jian Zhang},\n title = {DroidLeaks: a comprehensive database of resource leaks in Android\n apps},\n journal = {Empirical Software Engineering},\n volume = {24},\n number = {6},\n pages = {3435--3483},\n year = {2019},\n url = {https://doi.org/10.1007/s10664-019-09715-8},\n doi = {10.1007/s10664-019-09715-8},\n timestamp = {Thu, 19 Dec 2019 09:26:48 +0100},\n biburl = {https://dblp.org/rec/bib/journals/ese/LiuWWXCWYZ19},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Automatic Software Refactoring via Weighted Clustering in Method-level Networks","date":"2018","authors":["Ying Wang","Hai Yu","Zhiliang Zhu","Wei Zhang","Yuli Zhao"],"venue":"IEEE Transactions on Software Engineering (TSE)","venueShort":"TSE","tags":[],"abstract":"\n In this study, we describe a system-level multiple refactoring algorithm, which can identify the move method, move field, and extract class refactoring opportunities automatically according to the principle of “high cohesion and low coupling.” The algorithm works by merging and splitting related classes to obtain the optimal functionality distribution from the system-level. Furthermore, we present a weighted clustering algorithm for regrouping the entities in a system based on merged method-level networks. Using a series of preprocessing steps and preconditions, the “bad smells” introduced by cohesion and coupling problems can be removed from both the non-inheritance and inheritance hierarchies without changing the code behaviors. We rank the refactoring suggestions based on the anticipated benefits that they bring to the system. Based on comparisons with related research and assessing the refactoring results using quality metrics and empirical evaluation, we show that the proposed approach performs well in different systems and is beneficial from the perspective of the original developers. Finally, an open source tool is implemented to support the proposed approach.\n ","paperUrl":"materials/TSE18-ying.pdf","projectUrl":"https://github.com/wangying8052/REsolution_runnable-JAR-File","bibtex":"@article{wang2018automatic,\n title={Automatic Software Refactoring via Weighted Clustering in Method-Level Networks},\n author={Ying, Wang and Hai, Yu and Zhiliang, Zhu and Wei, Zhang and Yuli, Zhao},\n journal={IEEE Transactions on Software Engineering},\n volume={44},\n number={3},\n pages={202--236},\n year={2018},\n publisher={IEEE}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Risk Analysis on Multi-granular Network for Software Integration Testing","date":"2018","authors":["Ying Wang","Zhiliang Zhu","Hai Yu"],"venue":"IEEE Transactions on Circuits and Systems II: Express Briefs (TCAS2)","venueShort":"TCAS2","tags":[],"abstract":"\n This brief presents a model, a methodology, and an application scheme of risk assessment for information exchange system. The multi-granular flow network (MGFN) model serves as a basis for measuring the vulnerabilities and threats of components, and the failure consequences they bring to the system when a failure occurs. The risk factors of components are then quantified, assisted by a probabilistic risk analysis model. Furthermore, we apply the MGFN model and the risk assessment scheme in ordering class integration testing for object-oriented software system. By comparing our approach with the state-of-the-art integration test order algorithms from the perspectives of detection efficiency of severe faults and stubbing efforts, we show that classes with higher risk indexes can be tested in earlier integration steps, and that the total complexity of the established test stubs is minimized.\n ","paperUrl":"materials/TCAS218-ying.pdf","bibtex":"@article{wang2018risk,\n title={Risk Analysis on Multi-Granular Flow Network for Software Integration Testing},\n author={Ying, Wang and Zhiliang, Zhu and Hai, Yu and Bo, Yang},\n journal={IEEE Transactions on Circuits and Systems II: Express Briefs},\n volume={65},\n number={8},\n pages={1059--1063},\n year={2018},\n publisher={IEEE}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Do the Dependency Conflicts in My Project Matter?","date":"2018","authors":["Ying Wang","Ming Wen","Zhenwei Liu","Rongxin Wu","Rui Wang","Bo Yang","Hai Yu","Zhiliang Zhu","Shing-Chi Cheung"],"venue":" The ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, Technical Research Paper, Lake Buena Vista, Florida, 4 Nov - 9 Nov 2018","venueShort":"ESEC/FSE","tags":[],"abstract":"\n Intensive dependencies of a Java project on third-party libraries can easily lead to the presence of multiple library or class versions on its classpath. When this happens, JVM will load one version and shadows the others. Dependency conflict (DC) issues occur when the loaded version fails to cover a required feature (e.g., method) referenced by the project, thus causing runtime exceptions. However, the warnings of duplicate classes or libraries detected by existing build tools such as Maven can be benign since not all instances of duplication will induce runtime exceptions, and hence are often ignored by developers. In this paper, we conducted an empirical study on real-world DC issues collected from large open source projects. We studied the manifestation and fixing patterns of DC issues. Based on our findings, we designed Decca, an automated detection tool that assesses DC issues' severity and filters out the benign ones. Our evaluation results on 30 projects show that Decca achieves a precision of 0.923 and recall of 0.766 in detecting high-severity DC issues. Decca also detected new DC issues in these projects. Subsequently, 20 DC bug reports were filed, and 11 of them were confirmed by developers. Issues in 6 reports were fixed with our suggested patches.\n ","paperUrl":"materials/fse18-ying.pdf","projectUrl":"https://deccadc.github.io/fse18/","slidesUrl":"materials/fse18-ying-slides.pdf","bibtex":"@inproceedings{wang2018conflict,\n title={Do the Dependency Conflicts in My Project Matter?},\n author={Wang, Ying and Wen, Ming and Liu, Zhenwei and Wu, Rongxin and Wang, Rui and Yang, Bo and Yu, Hai and Zhu, Zhiliang and Cheung, Shing-Chi},\n booktitle={Proceedings of the 2018 26th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE 2018)},\n pages={1--12},\n year={2018},\n organization={ACM}\n}","arxivUrl":null,"awards":[]},{"title":"Using Risk Analysis to Prioritize Test Cases","date":"2018","authors":["Ying Wang","Hai Yu","Zhiliang Zhu"],"venue":"Journal of Systems and Software (JSS)","venueShort":"JSS","tags":[],"abstract":"\n In this paper, we present a risk-based test case prioritization (Ri-TCP) algorithm based on the transmission of information flows among software components. Most of the existing approaches rely on the historical code changes or test case execution data, few of them effectively use the system topology information covered by test cases when scheduling the execution of test cases. From the perspective of code structure, the proposed algorithm firstly maps software into an information flow-based directed network model. Then, functional paths covered by each test case are represented by a set of barbell motifs. Finally, combining with probabilistic risk analysis (PRA) and fault tree model, we assign a priority to each test case by calculating the sum of risk indexes of all the barbells covered by it. Experimental results demonstrate that Ri-TCP technique has a higher detection rate of faults with serious risk indicators and performs stably in different systems, compared with the other state-of-the-art algorithms.\n ","paperUrl":"materials/JSS18-ying.pdf","bibtex":"@article{wang2018using,\n title={Using reliability risk analysis to prioritize test cases},\n author={Ying, Wang and Zhiliang, Zhu and Bo,Yang and Fangda, Guo and Hai,Yu},\n journal={Journal of Systems and Software},\n volume={139},\n pages={14--31},\n year={2018},\n publisher={Elsevier}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Could I Have a Stack Trace to Examine the Dependency Conflict Issue?","date":"2019","authors":["Ying Wang","Ming Wen","Rongxin Wu","Zhenwei Liu","Shin Hwei Tan","Zhiliang Zhu","Hai Yu","Shing-Chi Cheung"],"venue":"International Conference on Software Engineering 2019, Technical Research Paper, Montréal, QC, Canada, 25 May - 31 May","venueShort":"ICSE","tags":[],"abstract":"\n Intensive use of libraries in Java projects brings potential risk of dependency conflicts, which occur when a project directly or indirectly depends on multiple versions of the same library or class. When this happens, JVM loads one version and shadows the others. Runtime exceptions can occur when methods in the shadowed versions are referenced. Although project management tools such as Maven are able to give warnings of potential dependency conflicts when a project is built, developers often ask for crashing stack traces before examining these warnings. It motivates us to develop RIDDLE, an automated approach that generates tests and collects crashing stack traces for projects subject to risk of dependency conflicts. RIDDLE, built on top of ASM and EVOSUITE, combines condition mutation, search strategies and condition restoration. We applied RIDDLE on 19 real-world Java projects with duplicate libraries or classes. We reported 20 identified dependency conflicts including their induced crashing stack traces and the details of generated tests. Among them, 15 conflicts were confirmed by developers as real issues, and 10 were readily fixed. The evaluation results demonstrate the effectiveness and usefulness of RIDDLE.\n ","paperUrl":"materials/ICSE19-ying.pdf","projectUrl":"https://skillwind.github.io/RiddleDC/index.html","bibtex":"@inproceedings {WANG2019STACK,\n title = {{Could I Have a Stack Trace to Examine the Dependency Conflict Issue?}},\n author = {Ying, Wang and Ming, Wen and Rongxin, Wu and Zhenwei, Liu and Shin Hwei, Tan and Zhiliang, Zhu and Hai, Yu and Shing-Chi, Cheung},\n booktitle = {{Proceedings of the 41th International Conference on Software Engineering}},\n series = {ICSE 2019},\n year = {2019},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Scaling Up Symbolic Analysis by Removing Z-Equivalent States","date":"2014","authors":["Yueqi Li","Shing-Chi Cheung","Xiangyu Zhang","Yepang Liu"],"venue":"ACM Transactions on Software Engineering and Methodology 23(4), August 2014","venueShort":"TOSEM","tags":[],"abstract":"\n Path explosion is a major issue in applying path-sensitive symbolic analysis to large programs. We ob- serve that many symbolic states generated by the symbolic analysis of a procedure are indistinguishable to its callers. It is, therefore, possible to keep only one state from each set of equivalent symbolic states without affecting the analysis result. Based on this observation, we propose an equivalence relation called z-equivalence, which is weaker than logical equivalence, to relate a large number of z-equivalent states. We prove that z-equivalence is strong enough to guarantee that paths to be traversed by the symbolic analysis of two z-equivalent states are identical, giving the same solutions to satisfiability and validity queries. We propose a sound linear algorithm to detect z-equivalence. Our experiments show that the symbolic analysis that leverages z-equivalence is able to achieve more than ten orders of magnitude reduction in terms of search space. The reduction significantly alleviates the path explosion problem, enabling us to apply symbolic analysis in large programs such as Hadoop and Linux Kernel.\n ","paperUrl":"http://sccpu2.cse.ust.hk/andrewust/files/tosem14.pdf","bibtex":"@article{Li:TOSEM2014,\n author = {Li, Yueqi and Cheung, Shing-Chi and Zhang, Xiangyu and Liu, Yepang},\n title = {Scaling Up Symbolic Analysis by Removing Z-Equivalent States},\n journal = {ACM Trans. Softw. Eng. Methodol.},\n issue_date = {August 2014},\n volume = {23},\n number = {4},\n month = sep,\n year = {2014},\n pages = {34:1--34:32},\n articleno = {34},\n url = {http://doi.acm.org/10.1145/2652484},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Symbolic State Validation through Runtime Data","date":"2014","authors":["Yueqi Li","Shing-Chi Cheung"],"venue":"29th IEEE/ACM International Conference on Automated Software Engineering (ASE 2014), Vasteras, Sweden, September 2014","venueShort":"ASE","tags":[],"abstract":"\n Real world programs are typically built on top of many library functions. Symbolic analysis of these programs generally requires precise models of these functions? Application Programming Interfaces (APIs), which are mostly unavailable because these models are costly to construct. A variant approach of symbolic analysis is to over-approximate the return values of those APIs that have not been modeled. However, such approximation can induce many unreachable symbolic states, which are expensive to validate manually. In this paper, we propose a static approach to automatically validating the reported anomalous symbolic states. The validation makes use of the available runtime data of the un-modeled APIs collected from previous program executions. We show that the symbolic state validation problem can be cast as a MAX-SAT problem and solved by existing constraint solvers.\n\nOur approach is motivated by two observations. We may bind the symbolic parameters in un-modeled APIs based on observations made in former executions by other programs. The binding enables us to use the corresponding observed concrete return values of APIs to validate the symbolic states arising from the over-approximated return values of the un-modeled APIs. Second, some symbolic constraints can be accurately evaluated despite the imprecision of the over-approximated symbolic values.\n\nOur technique found 80 unreported bugs when it was applied to 10 popular programs with a total of 1.5 million lines of code. All of them can be confirmed by test cases. Our technique presents a promising way to apply the big data paradigm to software engineering. It provides a mechanism to validate the symbolic states of a project by leveraging the many concrete input-output values of APIs collected from other projects.\n ","paperUrl":"http://dl.acm.org/citation.cfm?doid=2642937.2642973","bibtex":"@inproceedings{DBLP:conf/kbse/LiC14,\n author = {Yueqi Li and\n Shing{-}Chi Cheung},\n title = {Symbolic state validation through runtime data},\n booktitle = {{ACM/IEEE} International Conference on Automated Software Engineering,\n {ASE} '14, Vasteras, Sweden - September 15 - 19, 2014},\n pages = {187--198},\n year = {2014},\n crossref = {DBLP:conf/kbse/2014},\n url = {http://doi.acm.org/10.1145/2642937.2642973},\n doi = {10.1145/2642937.2642973},\n timestamp = {Fri, 07 Nov 2014 12:44:47 +0100},\n biburl = {http://dblp.uni-trier.de/rec/bib/conf/kbse/LiC14},\n bibsource = {dblp computer science bibliography, http://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"An Empirical Study on TensorFlow Program Bugs","date":"2018","authors":["Yuhao Zhang","Yifan Chen","Shing-Chi Cheung","Yingfei Xiong","Lu Zhang"],"venue":"International Symposium on Software Testing and Analysis, Amsterdam Netherlands, July 2018","venueShort":"ISSTA","tags":[],"abstract":"\n Deep learning applications become increasingly popular in important domains such as self-driving systems and facial identity systems. Defective deep learning applications may lead to catastrophic consequences. Although recent research efforts were made on testing and debugging deep learning applications, the characteristics of deep learning defects have never been studied. To fill this gap, we studied deep learning applications built on top of TensorFlow and collected program bugs related to TensorFlow from StackOverflow QA pages and Github projects. We extracted information from QA pages, commit messages, pull request messages, and issue discussions to examine the root causes and symptoms of these bugs. We also studied the strategies deployed by TensorFlow users for bug detection and localization. These findings help researchers and TensorFlow users to gain a better understanding of coding defects in TensorFlow programs and point out a new direction for future research.\n ","paperUrl":"materials/issta18main-p98-p.pdf","bibtex":"@inproceedings {ISSTA18,\n title = {{An Empirical Study on TensorFlow Program Bugs}},\n author = {Yuhao Zhang, Yifan Chen, Shing-Chi Cheung, Yingfei Xiong, Lu Zhang},\n booktitle = {{Proceedings of The ACM SIGSOFT International Symposium on Software Testing and Analysis}},\n year = {2018},\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"ReScue: Crafting Regular Expression DoS Attacks","date":"2018","authors":["Yuju Shen","Yanyan Jiang","Chang Xu","Ping Yu","Xiaoxing Ma","Jian Lu"],"venue":"2018 33rd ACM/IEEE International Conference on Automated Software Engineering (ASE '18), September 2018, Montpellier, France","venueShort":"ASE","tags":[],"abstract":"\n Regular expression (regex) with modern extensions is one of the most popular string processing tools. However, poorly-designed regexes can yield exponentially many matching steps, and lead to regex Denial-of-Service (ReDoS) attacks under well-conceived string inputs. This paper presents ReScue, a three-phase gray-box analytical technique, to automatically generate ReDoS strings to highlight vulnerabilities of given regexes. ReScue systematically seeds (by a genetic search), incubates (by another genetic search), and finally pumps (by a regex-dedicated algorithm) for generat- ing strings with maximized search time. We implemenmted the ReScue tool and evaluated it against 29,088 practical regexes in real-world projects. The evaluation results show that ReScue found 49% more attack strings compared with the best existing technique, and applying ReScue to popular GitHub projects discovered ten previously unknown ReDoS vulnerabilities.\n ","paperUrl":"https://cs.nju.edu.cn/changxu/1_publications/ASE18.pdf","projectUrl":"http://2bdenny.github.io/ReScue/","bibtex":"@inproceedings{shen_rescue_2018,\n author = {Yuju Shen and Yanyan Jiang and Chang Xu and Ping Yu and Xiaoxing Ma and Jian Lu},\n title = {ReScue: Crafting regular expression DoS attacks},\n pages = {to appear},\n year = {2018},\n booktitle = {Proceedings of the 33rd International Conference on Automated Software Engineering (ASE)},\n pdf = {/spar/publication/shen_rescue_2018.pdf},\n code = {http://2bdenny.github.io/ReScue/},\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Analyzing and Disentangling Interleaved Interrupt-Driven IoT Programs","date":"2019","authors":["Yuxia Sun","Song Guo","Shing-Chi Cheung","Yong Tang"],"venue":"IEEE Internet of Things Journal 2019","venueShort":"IoT-J","tags":[],"abstract":"\n In the Internet of Things (IoT) community, wireless sensor network (WSN) is a key technique to enable ubiquitous sensing of environments and provide reliable services to applications. WSN programs, typically interrupt-driven, implement the functionalities via the collaboration of interrupt procedure instances (IPIs, namely executions of interrupt processing logic). However, due to the complicated concurrency model of WSN programs, the IPIs are interleaved intricately and the program behaviors are hard to predicate from the source codes. Thus, to improve the software quality of WSN programs, it is significant to disentangle the interleaved executions and develop various IPI-based program analysis techniques, including offline and online ones. As the common foundation of those techniques, a generic efficient and real-time algorithm to identify IPIs is urgently desired. However, the existing instance-identification approach cannot satisfy the desires. In this paper, we first formally define the concept of IPI. Next, we propose a generic IPI-identification algorithm, and prove its correctness, real-time, and efficiency. We also conduct comparison experiments to illustrate that our algorithm is more efficient than the existing one in terms of both time and space. As the theoretical analyses and empirical studies exhibit, our algorithm provides the groundwork for IPI-based analyses of WSN programs in IoT environment.\n ","paperUrl":"https://ieeexplore.ieee.org/document/8648188","bibtex":"@article{DBLP:journals/iotj/SunGCT19,\n author = {Yuxia Sun and\n Song Guo and\n Shing{-}Chi Cheung and\n Yong Tang},\n title = {Analyzing and Disentangling Interleaved Interrupt-Driven IoT Programs},\n journal = {{IEEE} Internet of Things Journal},\n volume = {6},\n number = {3},\n pages = {5376--5386},\n year = {2019},\n url = {https://doi.org/10.1109/JIOT.2019.2900769},\n doi = {10.1109/JIOT.2019.2900769},\n timestamp = {Fri, 05 Jul 2019 09:39:40 +0200},\n biburl = {https://dblp.org/rec/bib/journals/iotj/SunGCT19},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Disclosing and Locating Concurrency Bugs of Interrupt-Driven IoT Programs","date":"2019","authors":["Yuxia Sun","Shing-Chi Cheung","Song Guo","Ming Cheng"],"venue":"IEEE Internet of Things Journal 2019","venueShort":"IoT-J","tags":[],"abstract":"\n The Internet of Things (IoT) is envisioned as a distributed network formed by many end devices, e.g., the motes of wireless sensor network (WSN). These important IoT end devices enable ubiquitous sensing of environments and provide reliable services for mission-critical applications. However, programs running on WSN devices are typically interrupt-driven and prone to interrupt-induced concurrency bugs, which are primarily caused by erroneous interleavings among interrupt procedure instances (IPIs) (namely, executions of interrupt processing logic). In this paper, we use a set of dynamic bug patterns to characterize the concurrency bugs due to buggy access-interleavings among IPIs to shared resources, including shared memory locations and shared communication channels. By matching the above bug patterns, a dynamic analysis approach called disclosing and locating concurrency bugs of interrupt-driven IoT programs based on dynamic bug patterns (Daemon) is proposed to automatically detect and locate concurrency bugs in WSN programs. A GUI tool of Daemon is developed. As the empirical studies exhibit, the tool can discover concurrency bugs effectively and locate the buggy source lines visually.\n ","paperUrl":"https://ieeexplore.ieee.org/document/8746139","bibtex":"@article{DBLP:journals/iotj/SunCGC19,\n author = {Yuxia Sun and\n Shing{-}Chi Cheung and\n Song Guo and\n Ming Cheng},\n title = {Disclosing and Locating Concurrency Bugs of Interrupt-Driven IoT Programs},\n journal = {{IEEE} Internet of Things Journal},\n volume = {6},\n number = {5},\n pages = {8945--8957},\n year = {2019},\n url = {https://doi.org/10.1109/JIOT.2019.2925291},\n doi = {10.1109/JIOT.2019.2925291},\n timestamp = {Thu, 07 Nov 2019 09:19:37 +0100},\n biburl = {https://dblp.org/rec/bib/journals/iotj/SunCGC19},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Fuzzing Deep Learning Compilers with HirGen","date":"2023","authors":["Haoyang Ma","Qingchao Shen","Yongqiang Tian","Junjie Chen","Shing-Chi Cheung"],"venue":"ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","tags":["Deep Learning compiler testing"],"projectUrl":"https://zenodo.org/record/7905120#.ZKptii9ByJ8","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"StubCoder: Automated Generation and Repair of Stub Code for Mock Objects","date":"2023","authors":["Hengcheng Zhu","Lili Wei","Valerio Terragni","Yepang Liu","Shing-Chi Cheung","Jiarong Wu","Qin Sheng","Bing Zhang","Lihong Song"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["Mocking","Unit Test"],"abstract":"Mocking is an essential unit testing technique for isolating the class under test (CUT) from its dependencies. Developers often leverage mocking frameworks to develop stub code that specifies the behaviors of mock objects. However, developing and maintaining stub code is labor-intensive and error-prone. In this paper, we present StubCoder to automatically generate and repair stub code for regression testing. StubCoder implements a novel evolutionary algorithm that synthesizes test-passing stub code guided by the runtime behavior of test cases. We evaluated our proposed approach on 59 test cases from 13 open-source projects. Our evaluation results show that StubCoder can effectively generate stub code for incomplete test cases without stub code and repair obsolete test cases with broken stub code.","paperUrl":"https://doi.org/10.1145/3617171","projectUrl":"https://github.com/henryhchchc","bibtex":"@article{10.1145/3617171,\n author = {Zhu, Hengcheng and Wei, Lili and Terragni, Valerio and Liu, Yepang and Cheung, Shing-Chi and Wu, Jiarong and Sheng, Qin and Zhang, Bing and Song, Lihong},\n title = {StubCoder: Automated Generation and Repair of Stub Code for Mock Objects},\n year = {2023},\n publisher = {Association for Computing Machinery},\n address = {New York, NY, USA},\n issn = {1049-331X},\n url = {https://doi.org/10.1145/3617171},\n doi = {10.1145/3617171},\n abstract = {Mocking is an essential unit testing technique for isolating the class under test (CUT) from its dependencies. Developers often leverage mocking frameworks to develop stub code that specifies the behaviors of mock objects. However, developing and maintaining stub code is labor-intensive and error-prone. In this paper, we present StubCoder to automatically generate and repair stub code for regression testing. StubCoder implements a novel evolutionary algorithm that synthesizes test-passing stub code guided by the runtime behavior of test cases. We evaluated our proposed approach on 59 test cases from 13 open-source projects. Our evaluation results show that StubCoder can effectively generate stub code for incomplete test cases without stub code and repair obsolete test cases with broken stub code.},\n note = {Just Accepted},\n journal = {ACM Trans. Softw. Eng. Methodol.},\n month = {aug},\n keywords = {Test Generation and Repair, Genetic Programming, Software Testing, Mocking, Evolutionary Computation, Program Analysis}\n }","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"MockSniffer: Characterizing and Recommending Mocking Decisions for Unit Tests","date":"2020","authors":["Hengcheng Zhu","Lili Wei","Ming Wen","Yepang Liu","Shing-Chi Cheung","Qin Sheng","Cui Zhou"],"venue":"IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","tags":["Mocking","Unit Test"],"abstract":"In unit testing, mocking is popularly used to ease test effort, reduce test flakiness, and increase test coverage by replacing the actual dependencies with simple implementations. However, there are no clear criteria to determine which dependencies in a unit test should be mocked. Inappropriate mocking can have undesirable consequences: under-mocking could result in the inability to isolate the class under test (CUT) from its dependencies while over-mocking increases the developers' burden on maintaining the mocked objects and may lead to spurious test failures. According to existing work, various factors can determine whether a dependency should be mocked. As a result, mocking decisions are often difficult to make in practice. Studies on the evolution of mocked objects also showed that developers tend to change their mocking decisions: 17% of the studied mocked objects were introduced sometime after the test scripts were created and another 13% of the originally mocked objects eventually became unmocked. In this work, we are motivated to develop an automated technique to make mocking recommendations to facilitate unit testing. We studied 10,846 test scripts in four actively maintained open-source projects that use mocked objects, aiming to characterize the dependencies that are mocked in unit testing. Based on our observations on mocking practices, we designed and implemented a tool, MockSniffer, to identify and recommend mocks for unit tests. The tool is fully automated and requires only the CUT and its dependencies as input. It leverages machine learning techniques to make mocking recommendations by holistically considering multiple factors that can affect developers' mocking decisions. Our evaluation of MockSniffer on ten open-source projects showed that it outperformed three baseline approaches, and achieved good performance in two potential application scenarios.","paperUrl":"https://doi.org/10.1145/3324884.3416539","projectUrl":"https://github.com/henryhchchc/MockSniffer","bibtex":"@inproceedings{10.1145/3324884.3416539,\n author = {Zhu, Hengcheng and Wei, Lili and Wen, Ming and Liu, Yepang and Cheung, Shing-Chi and Sheng, Qin and Zhou, Cui},\n title = {MockSniffer: Characterizing and Recommending Mocking Decisions for Unit Tests},\n year = {2020},\n isbn = {9781450367684},\n publisher = {Association for Computing Machinery},\n address = {New York, NY, USA},\n url = {https://doi.org/10.1145/3324884.3416539},\n doi = {10.1145/3324884.3416539},\n abstract = {In unit testing, mocking is popularly used to ease test effort, reduce test flakiness, and increase test coverage by replacing the actual dependencies with simple implementations. However, there are no clear criteria to determine which dependencies in a unit test should be mocked. Inappropriate mocking can have undesirable consequences: under-mocking could result in the inability to isolate the class under test (CUT) from its dependencies while over-mocking increases the developers' burden on maintaining the mocked objects and may lead to spurious test failures. According to existing work, various factors can determine whether a dependency should be mocked. As a result, mocking decisions are often difficult to make in practice. Studies on the evolution of mocked objects also showed that developers tend to change their mocking decisions: 17% of the studied mocked objects were introduced sometime after the test scripts were created and another 13% of the originally mocked objects eventually became unmocked. In this work, we are motivated to develop an automated technique to make mocking recommendations to facilitate unit testing. We studied 10,846 test scripts in four actively maintained open-source projects that use mocked objects, aiming to characterize the dependencies that are mocked in unit testing. Based on our observations on mocking practices, we designed and implemented a tool, MockSniffer, to identify and recommend mocks for unit tests. The tool is fully automated and requires only the CUT and its dependencies as input. It leverages machine learning techniques to make mocking recommendations by holistically considering multiple factors that can affect developers' mocking decisions. Our evaluation of MockSniffer on ten open-source projects showed that it outperformed three baseline approaches, and achieved good performance in two potential application scenarios.},\n booktitle = {Proceedings of the 35th IEEE/ACM International Conference on Automated Software Engineering},\n pages = {436–447},\n numpages = {12},\n keywords = {unit testing, dependencies, recommendation system, mocking},\n location = {Virtual Event, Australia},\n series = {ASE '20}\n }","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"How Do Python Framework APIs Evolve? An Exploratory Study","date":"2020","authors":["Zhaoxu Zhang","Hengcheng Zhu","Ming Wen","Yida Tao","Yepang Liu","Yingfei Xiong"],"venue":"International Conference on Software Analysis, Evolution and Reengineering","venueShort":"SANER","tags":["Python","API Evolution"],"abstract":"Python is a popular dynamic programming language. In recent years, many frameworks implemented in Python have been widely used for data science and web development. Similar to frameworks in other languages, the APIs provided by Python frameworks often evolve, which would inevitably induce compatibility issues in client applications. While existing work has studied the evolution of frameworks in static programming languages such as Java, little is known on how Python framework APIs evolve and the characteristics of the compatibility issues induced by such evolution. To bridge this gap, we take a first look at the evolution of Python framework APIs and the resulting compatibility issues in client applications. We analyzed 288 releases of six popular Python frameworks from three different domains and 5,538 open-source projects built on these frameworks. We investigated the evolution patterns of Python framework APIs and found that they largely differ from those of Java framework APIs. We also investigated the compatibility issues in client applications and identified common strategies that developers adopt to fix these issues. Based on the empirical findings, we designed and implemented a tool, PYCOMPAT , to automatically detect compatibility issues caused by misusing evolved framework APIs in Python applications. Experiments on 10 real-world projects show that our tool can effectively detect compatibility issues of developers' concern.","paperUrl":"https://doi.org/10.1109/SANER48275.2020.9054800","projectUrl":"https://github.com/sqlab-sustech/PyCompat","bibtex":"@INPROCEEDINGS{9054800,\n author={Zhang, Zhaoxu and Zhu, Hengcheng and Wen, Ming and Tao, Yida and Liu, Yepang and Xiong, Yingfei},\n booktitle={2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering (SANER)},\n title={How Do Python Framework APIs Evolve? An Exploratory Study},\n year={2020},\n volume={},\n number={},\n pages={81-92},\n doi={10.1109/SANER48275.2020.9054800}\n }","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Characterizing and Detecting Configuration Compatibility Issues in Android Apps","date":"2021-08-26","authors":["Huaxun Huang","Ming Wen","Lili Wei","Yepang Liu","Shing-Chi Cheung"],"venue":"Proceedings of the 36th IEEE/ACM International Conference on Automated Software Engineering (ASE '21)","venueShort":"ASE","tags":["Android","Compatibility Issues","XML Configurations"],"abstract":"\nXML configuration files are widely used in Android to define an app's user interface and essential runtime information such as system permissions. As Android evolves, it might introduce functional changes in the configuration environment, thus causing compatibility issues that manifest as inconsistent app behaviors at different API levels. Such issues can often induce software crashes and inconsistent look-and-feel when running at specific Android versions. Existing works incur plenty of false positive and false negative issue-detection rules by conducting trivial data-flow analysis while failing to model the XML tree hierarchies of the Android configuration files. Besides, little is known about how the changes in an Android framework can induce such compatibility issues. To bridge such gaps, we conducted a systematic study by analyzing 196 real-world issues collected from 43 popular apps. We identified common patterns of Android framework code changes that induce such configuration compatibility issues. Based on the findings, we propose ConfDroid that can automatically extract rules for detecting configuration compatibility issues. The intuition is to perform symbolic execution based on a model learned from the common code change patterns. Experiment results show that ConfDroid can successfully extract 282 valid issue-detection rules with a precision of 91.9%. Among them, 65 extracted rules can manifest issues that cannot be detected by the rules of state-of-the-art baselines. More importantly, 11 out of them have led to the detection of 107 reproducible configuration compatibility issues that the baselines cannot detect in 30 out of 316 real-world Android apps.\n ","projectUrl":"https://sites.google.com/view/confdroid","paperUrl":"https:/castlelab.github.io/selected-publications/assets/ConfDroid-ASE21.pdf","arxivUrl":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"FlashSchema: Achieving High Quality XML Schemas with Powerful Inference Algorithms and Large-scale Schema Data.","date":"2020","authors":["Yeting LI"," Jialun CAO"," Haiming CHEN"," Tingjian GE"," Zhiwu XU"," Qiancheng PENG"],"venue":"International Conference on Data Engineering","venueShort":"ICDE","tags":["XML Schemas","Schemas Inference"],"abstract":"Getting high quality XML schemas to avoid or reduce application risks is an important problem in practice, for which some important aspects have yet to be addressed satisfactorily in existing work. In this paper, we propose a tool FlashSchema for high quality XML schema design, which supports both one-pass and interactive schema design and schema recommendation. To the best of our knowledge, no other existing tools support interactive schema design and schema recommendation. One salient feature of our work is the design of algorithms to infer k-occurrence interleaving regular expressions, which are not only more powerful in model capacity, but also more efficient. Additionally, such algorithms form the basis of our interactive schema design. The other feature is that, starting from largescale schema data that we have harvested from the Web, we devise a new solution for type inference, as well as propose schema recommendation for schema design. Finally, we conduct a series of experiments on two XML datasets, comparing with 9 state-of-the-art algorithms and open-source tools in terms of running time, preciseness, and conciseness. Experimental results show that our work achieves the highest level of preciseness and conciseness within only a few seconds. Experimental results and examples also demonstrate the effectiveness of our type inference and schema recommendation methods.","paperUrl":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9101818","arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"FlashRegex: Deducing Anti-ReDoS Regexes from Examples.","date":"2020","authors":["Yeting LI"," Zhiwu XU"," Jialun CAO"," Haiming CHEN"," Tingjian GE"," Shing-Chi CHEUNG","Haoren ZHAO"],"venue":"International Conference on Automated Software Engineering","venueShort":"ASE","tags":["regular expression","Anti-ReDoS","program synthesis","program repair"],"abstract":"Regular expressions (regexes) are widely used in different fields of computer science such as programming languages, string processing and databases. However, existing tools for synthesizing or repairing regexes were not designed to be resilient to Regex Denial of Service (ReDoS) attacks. Specifically, if a regex has super-linear (SL) worst-case complexity, an attacker could provide carefully-crafted inputs to launch ReDoS attacks. Therefore, in this paper, we propose a programming-by-example framework, FlashRegex, for generating anti-ReDoS regexes by either synthesizing or repairing from given examples. It is the first framework that integrates regex synthesis and repair with the awareness of ReDoS-vulnerabilities. We present novel algorithms to deduce anti-ReDoS regexes by reducing the ambiguity of these regexes and by using Boolean Satisfiability (SAT) or Neighborhood Search (NS) techniques. We evaluate FlashRegex with five related state-of-the-art tools. The evaluation results show that our work can effectively and efficiently generate anti-ReDoS regexes from given examples, and also reveal that existing synthesis and repair tools have neglected ReDoS-vulnerabilities of regexes. Specifically, the existing synthesis and repair tools generated up to 394 ReDoS-vulnerable regex within few seconds to more than one hour, while FlashRegex generated no SL regex within around five seconds. Furthermore, the evaluation results on ReDoS-vulnerable regex repair also show that FlashRegex has better capability than existing repair tools and even human experts, achieving 4 more ReDoS-invulnerable regex after repair without trimming and resorting, highlighting the usefulness of FlashRegex in terms of the generality, automation and user-friendliness","paperUrl":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9286092","arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"TransRegex: Multi-modal Regular Expression Synthesis by Generate-and-Repair.","date":"2021","authors":["Yeting LI"," Shuaimin LI"," Zhiwu XU"," Jialun CAO"," Zixuan CHEN"," Yun HU"," Haiming CHEN","Shing-Chi CHEUNG"],"venue":"International Conference on Software Engineering","venueShort":"ICSE","tags":["regular expression","regular expression synthesis","regex synthesis","regex repair","programming by example","programming by natural language"],"abstract":"Since regular expressions (abbrev. regexes) are difficult to understand and compose, automatically generating regexes has been an important research problem. This paper introduces Tr ansRegex , lor automatically constructing regexes from both natural language descriptions and examples. To the best of our knowledge, Tr ansRegex is the first to treat the Nl.P-and-example-based regex synthesis problem as the problem of NLP-based synthesis with regex repair. For this purpose, we present novel algorithms for both NLP-based synthesis and regex repair. We evaluate Tr ansRegex with ten relevant state-of-theart tools on three publicly available datasets. The evaluation results demonstrate that the accuracy of our Tr ansRegex is 17.4%, 35.8% and 38.9% higher than that of NLP-hased approaches on the three datasets, respectively. Furthermore, T r ansRegex can achieve higher accuracy than the stateof-the-art multi-modal techniques with 10% to 30% higher accuracy on all three datasets. The evaluation results also indicate Tr ansRegex utilizing natural language and examples in a more effective way.","paperUrl":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9401951","arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"ReDoSHunter: A Combined Static and Dynamic Approach for Regular Expression DoS Detection.","date":"2021","authors":["Yeting Li"," Zixuan Chen"," Jialun Cao"," Zhiwu Xu"," Qiancheng Peng"," Haiming Chen"," Liyuan Chen"," Shing-Chi Cheung"],"venue":"USENIX Security Symposium","venueShort":"USENIX Security","tags":["regular expression","Anti-ReDoS","ReDoS detection"],"abstract":"Regular expression Denial of Service (ReDoS) is a class of algorithmic complexity attacks where there exist inputs causing the typical backtracking-based matching algorithms to run super-linear time. Considering the widespread use of regular expressions (regexes), ReDoS is a pervasive and serious threat. Thus, early detection of ReDoS-vulnerable regexes in software projects is vital. Existing detection approaches mainly fall into two categories: static and dynamic analysis. However, the static approaches detect more candidate vulnerabilities at the cost of low precision, while dynamic approaches guarantee the precision of detection yet compromise the recall. Detecting ReDos at both high precision and high recall remains unsolved. Furthermore, we observed that a ReDoSvulnerable regex often contains more than one vulnerability in practice. However, existing tools are incapable of detecting multiple vulnerabilities in one regex. To bridge the gaps, we proposes ReDoSHunter, a ReDoSvulnerable regex detection framework that can effectively pinpoint the multiple root causes of a vulnerable regex and generate the associated attack-triggering strings. Driven by our concluded five vulnerability patterns, ReDoSHunter can not only pinpoint the multiple vulnerabilities in one regex, but also assess the degree (i.e., exponential or polynomial) of vulnerabilities it detects. The experiment results show that ReDoSHunter is able to achieve 100% precision and 100% recall on three large-scale datasets with 37,651 regexes. Furthermore, apart from being able to detect 100% the confirmed ReDoS CVEs (compared with 14.29%-60.00% achieved by existing works), ReDoSHunter also exposed 28 new ReDoSvulnerabilities in intensively-tested projects, resulting in 26 assigned CVEs and 2 fixed by developers.","paperUrl":null,"arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Programming by Example Made Easy","date":"2023","authors":["Jiarong Wu","Lili Wei","Yanyan Jiang","Shing-Chi Cheung","Luyao Ren","Chang Xu"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["Programming Synthesis","Programming by Example"],"abstract":"Programming by example (PBE) is an emerging programming paradigm that automatically synthesizes programs specified by user-provided input-output examples. Despite the convenience for end-users, implementing PBE tools often requires strong expertise in programming language and synthesis algorithms. Such a level of knowledge is uncommon among software developers. It greatly limits the broad adoption of PBE by the industry. To facilitate the adoption of PBE techniques, we propose a PBE framework called Bee, which leverages an “entity-action” model based on relational tables to ease PBE development for a wide but restrained range of domains. Implementing PBE tools with Bee only requires adapting domain-specific data entities and user actions to tables, with no need to design a domain-specific language or an efficient synthesis algorithm. The synthesis algorithm of Bee exploits bidirectional searching and constraint-solving techniques to address the challenge of value computation nested in table transformation. We evaluated Bee’s effectiveness on 64 PBE tasks from three different domains and usability with a human study of 12 participants. Evaluation results show that Bee is easier to learn and use than the state-of-the-art PBE framework, and the bidirectional algorithm achieves comparable performance to domain-specifically optimized synthesizers.","paperUrl":"https://dl.acm.org/doi/10.1145/3607185","projectUrl":"https://github.com/Sissel-Wu/Bee","bibtex":"@article{10.1145/3607185,\nauthor = {Wu, Jiarong and Wei, Lili and Jiang, Yanyan and Cheung, Shing-Chi and Ren, Luyao and Xu, Chang},\ntitle = {Programming by Example Made Easy},\nyear = {2023},\nissue_date = {January 2024},\npublisher = {Association for Computing Machinery},\naddress = {New York, NY, USA},\nvolume = {33},\nnumber = {1},\nissn = {1049-331X},\nurl = {https://doi.org/10.1145/3607185},\ndoi = {10.1145/3607185},\nabstract = {Programming by example (PBE) is an emerging programming paradigm that automatically synthesizes programs specified by user-provided input-output examples. Despite the convenience for end-users, implementing PBE tools often requires strong expertise in programming language and synthesis algorithms. Such a level of knowledge is uncommon among software developers. It greatly limits the broad adoption of PBE by the industry. To facilitate the adoption of PBE techniques, we propose a PBE framework called Bee, which leverages an “entity-action” model based on relational tables to ease PBE development for a wide but restrained range of domains. Implementing PBE tools with Bee only requires adapting domain-specific data entities and user actions to tables, with no need to design a domain-specific language or an efficient synthesis algorithm. The synthesis algorithm of Bee exploits bidirectional searching and constraint-solving techniques to address the challenge of value computation nested in table transformation. We evaluated Bee’s effectiveness on 64 PBE tasks from three different domains and usability with a human study of 12 participants. Evaluation results show that Bee is easier to learn and use than the state-of-the-art PBE framework, and the bidirectional algorithm achieves comparable performance to domain-specifically optimized synthesizers.},\njournal = {ACM Trans. Softw. Eng. Methodol.},\nmonth = {nov},\narticleno = {4},\nnumpages = {36},\nkeywords = {programming by example, Program synthesis}\n}","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"Can Systems Explain Permissions Better? Understanding Users' Misperceptions under Smartphone Runtime Permission Model","date":"2021-08-11","authors":["Bingyu Shen","Lili Wei","Chengcheng Xiang","Yudong Wu","Mingyao Shen","Yuanyuan Zhou","Xinxin Jin"],"venue":"the 30th USENIX Security Symposium, Vancouver, BC, Canada, Aug 11-13 2021","venueShort":"USENIX Security","tags":["Android","User study","Security"],"abstract":"\n Current smartphone operating systems enable users to manage permissions according to their personal preferences with a runtime permission model. Nonetheless, the systems provide very limited information when requesting permissions, making it difficult for users to understand permissions’ capabilities and potentially induced risks.\n In this paper, we first investigated to what extent current system-provided information can help users understand the scope of permissions and their potential risks. We took a mixed-methods approach by collecting real permission settings from 4,636 Android users, an interview study of 20 participants, and large-scale Internet surveys of 1559 users. Our study identified several common misunderstandings on the runtime permission model among users. We found that only a very small percentage (6.1%) of users can infer the scope of permission groups accurately from the system-provided information. This indicates that the information provided by current systems is far from sufficient.\n We thereby explored what extra information that systems can provide to help users make more informed permission decisions. By surveying users’ common concerns on apps’ permission requests, we identified five types of information (i.e., decision factors) that are helpful for users’ decisions. We further studied the impact and helpfulness of the factors to users’ permission decisions with both positive and negative messages. Our study shows that the background access factor helps most while the grant rate helps the least. Based on the findings, we provide suggestions for system designers to enhance future systems with more permission information.\n ","projectUrl":"https://ucsdopera.github.io/PermissionStudyUsenix21/dataset/","paperUrl":"http://cseweb.ucsd.edu/~byshen/files/sec21-shen.pdf","arxivUrl":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Logging Practices with Mobile Analytics: An Empirical Study on Firebase","date":"2021-05-17","authors":["Julian Harty","Haonan Zhang","Lili Wei","Luca Pascarella","Maurício Aniche","Weiyi Shang"],"venue":"the 8th IEEE/ACM International Conference on Mobile Software Engineering and Systems, Madrid, Spain, May 17-19 2021","venueShort":"MOBILESoft","tags":["Android","Empirical study"],"abstract":"\n Software logs are of great value in both industrial and open-source projects. Mobile analytics logging enables developers to collect logs remotely from their apps running on end user devices at the cost of recording and transmitting logs across the Internet to a centralised infrastructure.\n This paper makes a first step in characterising logging practices with a widely adopted mobile analytics logging library, namely Firebase Analytics. We provide an empirical evaluation of the use of Firebase Analytics in 57 open-source Android applications by studying the evolution of code-bases to understand: a) the needs-in-common that push practitioners to adopt logging practices on mobile devices, and b) the differences in the ways developers use local and remote logging.\n Our results indicate mobile analytics logs are less pervasive and less maintained than traditional logging code. Based on our analysis, we believe logging using mobile analytics is more user centered compared to traditional logging, where the latter is mainly used to record information for debugging purposes.\n ","paperUrl":"https://arxiv.org/abs/2104.02513","arxivUrl":null,"bibtex":null,"projectUrl":null,"slidesUrl":null,"awards":[]},{"title":"Characterizing Transaction-Reverting Statements inEthereum Smart Contracts","date":"2021-11-15","authors":["Lu Liu","Lili Wei","Wuqi Zhang","Ming Wen","Yepang Liu","Shing-Chi Cheung"],"venue":"The 36th IEEE/ACM International Conference on Automated Software Engineering","venueShort":"ASE","tags":["Blockchain","Smart Contracts","Empirical Study"],"awards":[],"abstract":"\nSmart contracts are programs stored on blockchains to execute transactions. \nWhen input constraints or security properties are violated at runtime, the transaction being executed by a smart contract needs to be reverted to avoid undesirable consequences.\nOn Ethereum, the most popular blockchain that supports smart contracts, developers can choose among three transaction-reverting statements (i.e., require, if...revert, and if...throw) to handle anomalous transactions.\nWhile these transaction-reverting statements are vital for preventing smart contracts from exhibiting abnormal behaviors or suffering malicious attacks, there is limited understanding of how they are used in practice. \nIn this work, we perform the first empirical study to characterize transaction-reverting statements in Ethereum smart contracts. \nWe measured the prevalence of these statements in 3,866 verified smart contracts from popular dapps and built a taxonomy of their purposes via manually analyzing 557 transaction-reverting statements.\nWe also compared template contracts and their corresponding custom contracts to understand how developers customize the use of transaction-reverting statements.\nFinally, we analyzed the security impact of transaction-reverting statements by removing them from smart contracts and comparing the mutated contracts against the original ones. \nOur study led to important findings.\nFor example, we found that transaction-reverting statements are commonly used to perform seven types of authority verifications or validity checks, and missing such statements may compromise the security of smart contracts.\nWe also found that current smart contract security analyzers cannot effectively handle transaction-reverting statements when detecting security vulnerabilities.\nOur findings can shed light on further research in the broad area of smart contract quality assurance and provide practical guidance to smart contract developers on the appropriate use of transaction-reverting statements. \n ","projectUrl":"https://github.com/transaction-reverting-statements/Characterizing-require-statement-in-Ethereum-Smart-Contract","arxivUrl":"https://arxiv.org/abs/2108.10799","paperUrl":"https:/castlelab.github.io/selected-publications/assets/Characterizing_Transaction_Reverting_Statements-ASE21.pdf","slidesUrl":null,"bibtex":""},{"title":"Nyx: Detecting Exploitable Front-Running Vulnerabilities in Smart Contracts","date":"2024-05-20","authors":["Wuqi Zhang","Zhuo Zhang","Qingkai Shi","Lu Liu","Lili Wei","Yepang Liu","Xiangyu Zhang","Shing-Chi Cheung"],"venue":"The 45th IEEE Symposium on Security and Privacy","venueShort":"S&P","tags":["Blockchain","Front-running","Vulnerability","MEV","Program Analysis"],"abstract":"Smart contracts are susceptible to front-running attacks, in which malicious users leverage prior knowledge of upcoming transactions to execute attack transactions in advance and benefit their own portfolios. Existing contract analysis techniques raise a number of false positives and false negatives in that they simplistically treat data races in a contract as front-running vulnerabilities and can only analyze contracts in isolation. In this work, we formalize the definition of exploitable front-running vulnerabilities based on previous empirical studies on historical attacks, and present Nyx, a novel static analyzer to detect them. Nyx features a Datalog-based preprocessing procedure that efficiently and soundly prunes a large part of the search space, followed by a symbolic validation engine that precisely locates vulnerabilities with an SMT solver. We evaluate Nyx using a large dataset that comprises 513 real-world front-running attacks in smart contracts. Compared to six state-of-the-art techniques, Nyx surpasses them by 32.64%-90.19% in terms of recall and 2.89%-70.89% in terms of precision. Nyx has also identified four zero-days in real-world smart contracts.","projectUrl":null,"arxivUrl":null,"paperUrl":"https:/castlelab.github.io/selected-publications/assets/Nyx-SP24.pdf","bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Combatting Front-Running in Smart Contracts: Attack Mining, Benchmark Construction and Vulnerability Detector Evaluation","date":"2023-04-15","authors":["Wuqi Zhang","Lili Wei","Shing-Chi Cheung","Yepang Liu","Shuqing Li","Lu Liu","Michael R. Lyu"],"venue":"Transactions on Software Engineering","venueShort":"TSE","tags":["Blockchain","Front-running","Vulnerability","MEV","Benchmark"],"abstract":"\n Front-running attacks have been a major concern on the blockchain. Attackers launch front-running attacks by inserting additional transactions before upcoming victim transactions to manipulate victim transaction executions and make profits. Recent studies have shown that front-running attacks are prevalent on the Ethereum blockchain and have caused millions of US dollars loss. Vulnerable smart contracts, blockchain programs invoked by transactions, are held responsible for front-running attacks. Although techniques to detect front-running vulnerabilities have been proposed, their performance on real-world vulnerable contracts is unclear. There is no large-scale benchmark based on real attacks to evaluate their capabilities. This motivates us to build a benchmark consisting of 513 real-world attacks with vulnerable code labeled in 235 distinct smart contracts. We propose automated techniques to effectively collect real-world attacks and localize the corresponding vulnerable code at scale. Our experiments show that our approaches are effective, achieving higher recall in finding real attacks and higher precision in pinpointing vulnerabilities compared to the existing techniques. The evaluation of seven state-of-the-art vulnerability detection techniques on the benchmark reveals their inadequacy in detecting front-running vulnerabilities, with a low recall of at most 6.04%. Our further analysis identifies four common limitations in existing techniques: lack of support for inter-contract analysis, inefficient constraint solving for cryptographic operations, improper vulnerability patterns, and lack of token support.\n ","projectUrl":"https://github.com/Troublor/erebus-redgiant","arxivUrl":null,"paperUrl":"https://ieeexplore.ieee.org/document/10108045","bibtex":null,"slidesUrl":null,"awards":[]},{"title":"ÐArcher: Detecting On-Chain-Off-Chain Synchronization Bugs in Decentralized Applications","date":"2021-08-23","authors":["Wuqi Zhang","Lili Wei","Shuqing Li","Yepang Liu","Shing-Chi Cheung"],"venue":"Proceedings of the 29th ACM Joint European SoftwareEngineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE ’21)","venueShort":"ESEC/FSE","tags":["Decentralized Applications","Testing","Blockchain"],"abstract":"\n Since the emergence of Ethereum, blockchain-based decentralized applications (DApps) have become increasingly popular and important. To balance the security, performance, and costs, a DApp typically consists of two layers: an on-chain layer to execute transactions and store crucial data on blockchain, and an off-chain layer to interact with users. A DApp needs to proactively synchronize its off-chain layer with the on-chain layer, otherwise, the inconsistent data in the off-chain layer could mislead users and cause undesirable consequences, e.g., loss of transaction fees. However, transactions sent to blockchain are not guaranteed to be executed and could even be reversed after execution due to chain reorganization. Such non-determinism in the transaction execution is unique to blockchain and DApp developers may fail to perform the on-chain-off-chain synchronization accurately due to their unfamiliarity of the complex transaction lifecycle.\n In this work, we investigate the challenges of synchronizing on-chain and off-chain data in Ethereum-based DApps. We present two types of bugs that could result in inconsistencies between the on-chain and off-chain layers. To help detect such on-chain-off-chain synchronization bugs, we introduce a state transition model to guide the testing of DApps, and propose two effective oracles to facilitate the automatic identification of bugs. We build the first testing framework, ÐArcher, to detect on-chain-off-chain synchronization bugs in DApps. We have evaluated ÐArcher on 11 popular real-world DApps. ÐArcher achieves high precision (99.3%), recall (87.6%), and accuracy (89.4%) in bug detection and significantly outperforms the baseline methods. It has found 15 real bugs in the 11 DApps. So far, six of the 15 bugs have been confirmed by the developers and three have been fixed. These promising results demonstrate the usefulness of ÐArcher.\n ","projectUrl":"https://github.com/Troublor/darcher","arxivUrl":"https://arxiv.org/pdf/2106.09440.pdf","paperUrl":"https:/castlelab.github.io/selected-publications/assets/DArcher-FSE21.pdf","bibtex":"@inproceedings{10.1145/3468264.3468546,\n author = {Zhang, Wuqi and Wei, Lili and Li, Shuqing and Liu, Yepang and Cheung, Shing-Chi},\n title = {DH{}Archer: Detecting on-Chain-off-Chain Synchronization Bugs in Decentralized Applications},\n year = {2021},\n isbn = {9781450385626},\n publisher = {Association for Computing Machinery},\n address = {New York, NY, USA},\n url = {https://doi.org/10.1145/3468264.3468546},\n doi = {10.1145/3468264.3468546},\n abstract = {Since the emergence of Ethereum, blockchain-based decentralized applications (DApps)\n have become increasingly popular and important. To balance the security, performance,\n and costs, a DApp typically consists of two layers: an on-chain layer to execute transactions\n and store crucial data on the blockchain and an off-chain layer to interact with users.\n A DApp needs to synchronize its off-chain layer with the on-chain layer proactively.\n Otherwise, the inconsistent data in the off-chain layer could mislead users and cause\n undesirable consequences, e.g., loss of transaction fees. However, transactions sent\n to the blockchain are not guaranteed to be executed and could even be reversed after\n execution due to chain reorganization. Such non-determinism in the transaction execution\n is unique to blockchain. DApp developers may fail to perform the on-chain-off-chain\n synchronization accurately due to their lack of familiarity with the complex transaction\n lifecycle. In this work, we investigate the challenges of synchronizing on-chain and\n off-chain data in Ethereum-based DApps. We present two types of bugs that could result\n in inconsistencies between the on-chain and off-chain layers. To help detect such\n on-chain-off-chain synchronization bugs, we introduce a state transition model to\n guide the testing of DApps and propose two effective oracles to facilitate the automatic\n identification of bugs. We build the first testing framework, DH{}Archer, to detect on-chain-off-chain\n synchronization bugs in DApps. We have evaluated DH{}Archer on 11 popular real-world\n DApps. DH{}Archer achieves high precision (99.3%), recall (87.6%), and accuracy (89.4%)\n in bug detection and significantly outperforms the baseline methods. It has found\n 15 real bugs in the 11 DApps. So far, six of the 15 bugs have been confirmed by the\n developers, and three have been fixed. These promising results demonstrate the usefulness\n of DH{}Archer.},\n booktitle = {Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},\n pages = {553–565},\n numpages = {13},\n keywords = {Software testing, DApps, Decentralized applications, Blockchain},\n location = {Athens, Greece},\n series = {ESEC/FSE 2021}\n }","slidesUrl":null,"awards":[]},{"title":"Will Dependency Conflicts Affect My Program's Semantics?","date":"2021","authors":["Ying Wang","Rongxin Wu","Chao Wang","Ming Wen","Yepang Liu","Shing-Chi Cheung","Hai Yu","Chang Xu","Zhiliang Zhu"],"venue":"IEEE Transactions on Software Engineering","venueShort":"TSE","tags":["Third-Party Libraries","Java","Dependency Management"],"abstract":"\n Java projects are often built on top of various third-party libraries. If multiple versions of a library exist on the classpath, JVM will only load one version and shadow the others, which we refer to as dependency conflicts. This would give rise to semantic conflict (SC) issues, if the library APIs referenced by a project have identical method signatures but inconsistent semantics across the loaded and shadowed versions of libraries. SC issues are difficult for developers to diagnose in practice, since understanding them typically requires domain knowledge. Although adapting the existing test generation technique for dependency conflict issues, Riddle, to detect SC issues is feasible, its effectiveness is greatly compromised. This is mainly because Riddle randomly generates test inputs, while the SC issues typically require specific arguments in the tests to be exposed. To address that, we conducted an empirical study of 316 real SC issues to understand the characteristics of such specific arguments in the test cases that can capture the SC issues. Inspired by our empirical findings, we propose an automated testing technique Sensor, which synthesizes test cases using ingredients from the project under test to trigger inconsistent behaviors of the APIs with the same signatures in conflicting library versions. Our evaluation results show that Sensor is effective and useful: it achieved a Precision of 0.898 and a Recall of 0.725 on open-source projects and a Precision of 0.821 on industrial projects; it detected 306 semantic conflict issues in 50 projects, 70.4% of which had been confirmed as real bugs, and 84.2% of the confirmed issues have been fixed quickly.\n ","projectUrl":"https://sensordc.github.io/","paperUrl":"https://ieeexplore.ieee.org/document/9350237","slidesUrl":null,"bibtex":"@article{YingSensor,\n author = {Ying Wang and\n Rongxin Wu and\n Chao Wang and\n Ming Wen and\n Yepang Liu and\n Shing{-}Chi Cheung\n Hai Yu and\n Chang Xu\n and Zhiliang Zhu},\n title = {Will Dependency Conflicts Affect My Program's Semantics?},\n journal = {{IEEE} Transactions on Software Engineering},\n volume = {99},\n number = {1},\n pages = {1--22},\n year = {2021},\n url = {https://ieeexplore.ieee.org/document/9350237},\n doi = {10.1109/TSE.2021.3057767},\n timestamp = {Fri, 08 February 2021 21:56:08 +0200},\n biburl = {https://dblp.org/rec/journals/tsc/WangHXZC20.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n }","arxivUrl":null,"awards":[]},{"title":"Hero: On the Chaos When PATH Meets Modules","date":"2021","authors":["Ying Wang","Liang Qiao","Chang Xu","Yepang Liu","Shing-Chi Cheung","Na Meng","Hai Yu","Zhiliang Zhu"],"venue":"Proceedings of the 43rd International Conference on Software Engineering (ICSE ’21)","venueShort":"ICSE","tags":["Third-Party Libraries","Golang","Dependency Management"],"awards":["Distinguished Paper"],"abstract":"\n Ever since its first release in 2009, the Go programming language (Golang) has been well received by software communities. A major reason for its success is the powerful support of library-based development, where a Golang project can be conveniently built on top of other projects by referencing them as libraries. As Golang evolves, it recommends the use of a new library-referencing mode to overcome the limitations of the original one. While these two library modes are incompatible, both are supported by the Golang ecosystem. The heterogeneous use of library-referencing modes across Golang projects has caused numerous dependency management (DM) issues, incurring reference inconsistencies and even build failures. Motivated by the problem, we conducted an empirical study to characterize the DM issues, understand their root causes, and examine their fixing solutions. Based on our findings, we developed Hero, an automated technique to detect DM issues and suggest proper fixing solutions. We applied Hero to 19,000 popular Golang projects. The results showed that Hero achieved a high detection rate of 98.5% on a DM issue benchmark and found 2,422 new DM issues in 2,356 popular Golang projects. We reported 280 issues, among which 181 (64.6%) issues have been confirmed, and 160 of them (88.4%) have been fixed or are under fixing. Almost all the fixes have adopted our fixing suggestions.\n ","projectUrl":"http://www.hero-go.com/","paperUrl":"https://conf.researchr.org/details/icse-2021/icse-2021-papers/16/Hero-On-the-Chaos-When-PATH-Meets-Modules","slidesUrl":null,"bibtex":"@inproceedings{YingHero,\n author = {Ying Wang and\n Liang Qiao and\n Chang Xu and\n Yepang Liu and\n Shing{-}Chi Cheung and\n Na Meng and\n Hai Yu and \n Zhiliang Zhu},\n title = {Hero: On the Chaos When PATH Meets Modules},\n booktitle = {{ICSE} '21: 43rd International Conference on Software Engineering, Virtual\n Event, Spain, May 23-29, 2021},\n pages = {99--111},\n publisher = {{IEEE}},\n year = {2021},\n url = {https://ieeexplore.ieee.org/document/9401974},\n doi = {10.1109/ICSE43902.2021.00022},\n timestamp = {22-30 May 2021 10:58:23 +0100},\n biburl = {https://dblp.org/rec/conf/sigsoft/ZhangRC0C020.bib},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null},{"title":"Watchman: Monitoring Dependency Conflicts for Python Library Ecosystem","date":"2020","authors":["Ying Wang","Ming Wen","Yepang Liu","Yibo Wang","Zhenming Li","Chao Wang","Shing-Chi Cheung","Hai Yu","Chang Xu","Zhiliang Zhu"],"venue":"Proceedings of the 42rd International Conference on Software Engineering (ICSE ’20)","venueShort":"ICSE","tags":["Third-Party Libraries","Python","Dependency Management"],"abstract":"\n The PyPI ecosystem has indexed millions of Python libraries to allow developers to automatically download and install dependencies of their projects based on the specified version constraints. Despite the convenience brought by automation, version constraints in Python projects can easily conflict, resulting in build failures. We refer to such conflicts as Dependency Conflict (DC) issues. Although DC issues are common in Python projects, developers lack tool support to gain a comprehensive knowledge for diagnosing the root causes of these issues. In this paper, we conducted an empirical study on 235 real-world DC issues. We studied the manifestation patterns and fixing strategies of these issues and found several key factors that can lead to DC issues and their regressions. Based on our findings, we designed and implemented Watchman, a technique to continuously monitor dependency conflicts for the PyPI ecosystem. In our evaluation, Watchman analyzed PyPI snapshots between 11 Jul 2019 and 16 Aug 2019, and found 117 potential DC issues. We reported these issues to the developers of the corresponding projects. So far, 63 issues have been confirmed, 38 of which have been quickly fixed by applying our suggested patches.\n ","projectUrl":"http://www.watchman-pypi.com/","paperUrl":"https://dl.acm.org/doi/abs/10.1145/3377811.3380426","slidesUrl":"https://blog.acolyer.org/2020/09/21/watchman/","bibtex":"@inproceedings{YingWatchman,\n author = {Ying Wang and\n Ming Wen and\n Yepang Liu and\n Yibo Wang and\n Zhenming Li and\n Chao Wang and\n Shing{-}Chi Cheung and\n Hai Yu and\n Chang Xu and\n Zhiliang Zhu\n },\n title = {Watchman: Monitoring Dependency Conflicts for Python Library Ecosystem},\n booktitle = {{ICSE} '20: 42nd International Conference on Software Engineering, Virtual\n Event, Spain, July 6-11, 2020},\n pages = {125--135},\n publisher = {{ACM}},\n year = {2020},\n url = {https://dl.acm.org/doi/abs/10.1145/3377811.3380426},\n doi = {10.1145/3377811.3380426},\n timestamp = {Mon, 27 July 2020 16:42:27 +0200},\n biburl = {https://dblp.uni-trier.de/db/conf/icse/icse2020.html},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}","arxivUrl":null,"awards":[]},{"title":"To What Extent Do DNN-based Image Classification Models Make Unreliable Inferences?","date":"2021","authors":["Yongqiang Tian","Shiqing Ma","Ming Wen","Yepang Liu","Shing-Chi Cheung","Xiangyu Zhang"],"venue":"Empirical Software Engineering","venueShort":"EMSE","tags":["Testing","DNN model","Metamorphic Testing"],"abstract":"Deep Neural Network (DNN) models are widely used for image classification. While they offer high performance in terms of accuracy, researchers are concerned about if these models inappropriately make inferences using features irrelevant to the target object in a given image. To address this concern, we propose a metamorphic testing approach that assesses if a given inference is made based on irrelevant features. Specifically, we propose two metamorphic relations (MRs) to detect such unreliable inferences. These relations expect (a) the classification results with different labels or the same labels but less certainty from models after corrupting the relevant features of images, and (b) expect the classification results with the same labels after corrupting irrelevant features. The inferences that violate the metamorphic relations are regarded as unreliable inferences.\nOur evaluation demonstrated that our approach can effectively identify unreliable inferences for single-label classification models with an average precision of 64.1% and 96.4% for the two MRs, respectively. As for multi-label classification models, the corresponding precision for MR-1 and MR-2 is 78.2% and 86.5%, respectively. Further, we conducted an empirical study to understand the problem of unreliable inferences in practice. Specifically, we applied our approach to 18 pre-trained single-label image classification models and 3 multi-label classification models, and then examined their inferences on the ImageNet and COCO datasets. We found that unreliable inferences are pervasive. Specifically, for each model, more than thousands of correct classifications are actually made using irrelevant features. Next, we investigated the effect of such pervasive unreliable inferences, and found that they can cause significant degradation of a model's overall accuracy. After including these unreliable inferences from the test set, the model's accuracy can be significantly changed. Therefore, we recommend that developers should pay more attention to these unreliable inferences during the model evaluations. We also explored the correlation between model accuracy and the size of unreliable inferences. We found the inferences of the input with smaller objects are easier to be unreliable. Lastly, we found that the current model training methodologies can guide the models to learn object-relevant features to certain extent, but may not necessarily prevent the model from making unreliable inferences. We encourage the community to propose more effective training methodologies to address this issue.\n","projectUrl":"https://github.com/yqtianust/PaperUnreliableInference","paperUrl":"https://doi.org/10.1007/s10664-021-09985-1","bibtex":"@Article{Tian2021,\n author={Tian, Yongqiang\n and Ma, Shiqing\n and Wen, Ming\n and Liu, Yepang\n and Cheung, Shing-Chi\n and Zhang, Xiangyu},\n title={To what extent do DNN-based image classification models make unreliable inferences?},\n journal={Empirical Software Engineering},\n year={2021},\n month={Jun},\n day={18},\n volume={26},\n number={5},\n pages={84},\n abstract={Deep Neural Network (DNN) models are widely used for image classification. While they offer high performance in terms of accuracy, researchers are concerned about if these models inappropriately make inferences using features irrelevant to the target object in a given image. To address this concern, we propose a metamorphic testing approach that assesses if a given inference is made based on irrelevant features. Specifically, we propose two metamorphic relations (MRs) to detect such unreliable inferences. These relations expect (a) the classification results with different labels or the same labels but less certainty from models after corrupting the relevant features of images, and (b) the classification results with the same labels after corrupting irrelevant features. The inferences that violate the metamorphic relations are regarded as unreliable inferences. Our evaluation demonstrated that our approach can effectively identify unreliable inferences for single-label classification models with an average precision of 64.1{\\%} and 96.4{\\%} for the two MRs, respectively. As for multi-label classification models, the corresponding precision for MR-1 and MR-2 is 78.2{\\%} and 86.5{\\%}, respectively. Further, we conducted an empirical study to understand the problem of unreliable inferences in practice. Specifically, we applied our approach to 18 pre-trained single-label image classification models and 3 multi-label classification models, and then examined their inferences on the ImageNet and COCO datasets. We found that unreliable inferences are pervasive. Specifically, for each model, more than thousands of correct classifications are actually made using irrelevant features. Next, we investigated the effect of such pervasive unreliable inferences, and found that they can cause significant degradation of a model's overall accuracy. After including these unreliable inferences from the test set, the model's accuracy can be significantly changed. Therefore, we recommend that developers should pay more attention to these unreliable inferences during the model evaluations. We also explored the correlation between model accuracy and the size of unreliable inferences. We found the inferences of the input with smaller objects are easier to be unreliable. Lastly, we found that the current model training methodologies can guide the models to learn object-relevant features to certain extent, but may not necessarily prevent the model from making unreliable inferences. We encourage the community to propose more effective training methodologies to address this issue.},\n issn={1573-7616},\n doi={10.1007/s10664-021-09985-1},\n url={https://doi.org/10.1007/s10664-021-09985-1}\n }","arxivUrl":null,"slidesUrl":null,"awards":[]},{"title":"EvalDNN: a toolbox for evaluating deep neural network models","date":"2020","authors":["Yongqiang Tian","Zhihua Zeng","Ming Wen","Yepang Liu","Tzu-yang Kuo","Shing-Chi Cheung"],"venue":"42nd International Conference on Software Engineering, Demo","venueShort":"ICSE Demo","tags":["Testing","DNN model","Benchmark"],"projectUrl":"https://github.com/yqtianust/EvalDNN","paperUrl":"https://doi.org/10.1145/3377812.3382133","arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"A Comprehensive Study of Deep Learning Compiler Bugs","date":"2021","authors":["Qingchao Shen","Haoyang Ma","Junjie Chen","Yongqiang Tian","Shing-Chi Cheung","Xiang Chen"],"venue":"Proceedings of the 29th ACM Joint European SoftwareEngineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE ’21)","venueShort":"ESEC/FSE","tags":["DL Compiler","Empirical Study"],"projectUrl":"https://github.com/ShenQingchao/DLCstudy","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"AdvDoor: Adversarial Backdoor Attack of Deep Learning System","date":"2021","authors":["Quan Zhang","Yifeng Ding","Yongqiang Tian","Jianmin Guo","Min Yuan","Yu Jiang"],"venue":"ACM SIGSOFT International Symposium on Software Testing and Analysis","venueShort":"ISSTA","tags":["DNN model","Backdoor Attack"],"projectUrl":"https://github.com/AdvDoor/AdvDoor","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Finding Deviated Behaviors of the Compressed DNN Models for Image Classifications.","date":"2023","authors":["Yongqiang Tian","Wuqi Zhang","Ming Wen","Shing-Chi Cheung","Chengnian Sun","Shiqing Ma","Yu Jiang"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["DNN model"],"projectUrl":"https://dl.acm.org/doi/abs/10.1145/3583564","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"Revisiting the Evaluation of Deep Learning-Based Compiler Testing.","date":"2023","authors":["Yongqiang Tian","Zhenyang Xu","Yiwen Dong","Chengnian Sun","Shing-Chi Cheung"],"venue":"The 32nd International Joint Conference on Artificial Intelligence","venueShort":"IJCAI","tags":["Compiler testing"],"projectUrl":null,"paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]},{"title":"On the Caching Schemes to Speed Up Program Reduction.","date":"2023","authors":["Yongqiang Tian","Xueyan Zhang","Yiwen Dong","Zhenyang Xu","Mengxiao Zhang","Yu Jiang","Shing-Chi Cheung","Chengnian Sun"],"venue":"ACM Transactions on Software Engineering and Methodology","venueShort":"TOSEM","tags":["Program Reduction"],"projectUrl":"https://github.com/uw-pluverse/perses/blob/master/doc/RCC.md","paperUrl":null,"arxivUrl":null,"abstract":null,"bibtex":null,"slidesUrl":null,"awards":[]}] \ No newline at end of file diff --git a/src/bundle.ts b/src/bundle.ts index 8c579b2..982ca2c 100644 --- a/src/bundle.ts +++ b/src/bundle.ts @@ -19,8 +19,8 @@ async function main() { return value; }), 'validate date string'), authors: Joi.array().min(1).items(Joi.string()).required(), - venue: Joi.string().min(1).required(), - venueShort: Joi.string().min(1).required(), + venue: Joi.string().allow('').required(), + venueShort: Joi.string().allow('').required(), tags: Joi.array().items(Joi.string()), awards: Joi.array().items(Joi.string()), paperUrl: Joi.string().allow(null, ''),