@article {1969916, title = {Combating False Information by Sharing the Truth: A Study on the Spread of Fact-checks on Social Media}, journal = {Information Systems Frontiers}, year = {2022}, month = {2022}, abstract = {Misinformation on social media has become a horrendous problem in our society. Fact-checks on information often fall behind the diffusion of misinformation, which can lead to negative impacts on society. This research studies how different factors may affect the spread of fact-checks over the internet. We collected a dataset of fact-checks in a six-month period and analyzed how they spread on Twitter. The spread of fact-checks is measured by the total retweet count. The factors/variables include the truthfulness rating, topic of information, source credibility, etc. The research identifies truthfulness rating as a significant factor: conclusive fact-checks (either true or false) tend to be shared more than others. In addition, the source credibility, political leaning, and the sharing count also affect the spread of fact-checks. The findings of this research provide practical insights into accelerating the spread of the truth in the battle against misinformation online.}, keywords = {Business Analytics}, author = {Li,Jiexun and Chang,Xiaohui} } @article {1984346, title = {Combating Misinformation by Sharing the Truth: a Study on the Spread of Fact-Checks on Social Media}, journal = {Information Systems Frontiers}, year = {2022}, month = {2022}, abstract = {Misinformation on social media has become a horrendous problem in our society. Fact-checks on information often fall behind the diffusion of misinformation, which can lead to negative impacts on society. This research studies how different factors may affect the spread of fact-checks over the internet. We collected a dataset of fact-checks in a six-month period and analyzed how they spread on Twitter. The spread of fact-checks is measured by the total retweet count. The factors/variables include the truthfulness rating, topic of information, source credibility, etc. The research identifies truthfulness rating as a significant factor: conclusive fact-checks (either true or false) tend to be shared more than others. In addition, the source credibility, political leaning, and the sharing count also affect the spread of fact-checks. The findings of this research provide practical insights into accelerating the spread of the truth in the battle against misinformation online.}, keywords = {Business Analytics}, author = {Li,Jiexun and Chang,Xiaohui} } @article {1969926, title = {Improving Student Engagement and Connection in Online Learning: Part II, Methodologies and Practices}, year = {2022}, month = {2022}, abstract = {The first article in the series appeared last December. Since then, we have received plenty of feedback from other instructors who are actively engaged in online education. Almost all of them agreed that teaching well online remains a challenging task. In this article, I discussed six specific practices that I have found particularly helpful for online teaching and learning.

Practice 1: Adopt a variety of communication methods
Practice 2: Create a Q\&A Discussion Board
Practice 3: Estimate the amount of time taken for each assignment
Practice 4: Ensure timely replies
Practice 5: Synchronize assignments with the Canvas calendar
Practice 6: Reorganize course content}, keywords = {Business Analytics}, url = {https://blogs.oregonstate.edu/inspire/2021/12/07/}, author = {Chang,Xiaohui} } @article {1969921, title = {Improving Student Engagement and Connection in Online Learning through Proactive Support}, year = {2021}, month = {2021}, abstract = {Xiaohui Chang associate professor of Business Analytics, doesn{\textquoteright}t hold office hours. She holds "ask me anything hours" as a part of her methods to engage, connect and show empathy for her online students. Her first essay on the Ecampus teaching journey has great tips for all of our increased interactions in the virtual space.}, keywords = {Business Analytics}, url = {https://blogs.oregonstate.edu/inspire/2021/12/07/improving-student-engagement-and-connection-in-online-learning-through-proactive-support/}, author = {Chang,Xiaohui} } @article {1969936, title = {Improving Mobile Health Apps Usage: A Quantitative Study on mPower Data of Parkinson{\textquoteright}s Disease}, journal = {Information Technology and People}, volume = {34}, year = {2020}, month = {2020}, pages = {399{\textendash}420}, abstract = {Purpose
The emergence of mobile health (mHealth) products has created a capability of monitoring and managing the health of patients with chronic diseases. These mHealth technologies would not be beneficial unless they are adopted and used by their target users. This study identifies key factors affecting the usage of mHealth apps based on user usage data collected from an mHealth app.

Design/methodology/approach

Using a data set collected from an mHealth app named mPower, developed for patients with Parkinson{\textquoteright}s disease (PD), this paper investigated the effects of disease diagnosis, disease progression, and mHealth app difficulty level on app usage, while controlling for user information. App usage is measured by five different activity counts of the app.

Findings
The results across five measures of mHealth app usage vary slightly. On average, previous professional diagnosis and high user performance scores encourage user participation and engagement, while disease progression hinders app usage.

Research limitations/implications
The findings potentially provide insights into better design and promotion of mHealth products and improve the capability of health management of patients with chronic diseases.

Originality/value
Studies on the mHealth app usage are critical but sparse because large-scale and reliable mHealth app usage data are limited. Unlike earlier works based solely on survey data, this research used a large user usage data collected from an mHealth app to study key factors affecting app usage. The methods presented in this study can serve as a pioneering work for the design and promotion of mHealth technologies.}, keywords = {Business Analytics}, author = {Li,Jiexun and Chang,Xiaohui} } @article {1969931, title = {Modeling and Regionalization of China{\textquoteright}s PM2.5 Using Spatial-Functional Mixture Models}, journal = {Journal of the American Statistical Association}, volume = {116}, year = {2020}, month = {2020}, pages = {116{\textendash}132}, keywords = {Business Analytics}, author = {Liang,Decai and Zhang,Haozhe and Chang,Xiaohui and Huang,Hui} } @article {1969946, title = {Noise Accumulation in High Dimensional Classification and Total Signal Index}, journal = {Journal of Machine Learning Research}, volume = {21}, year = {2020}, month = {2020}, pages = {1-23}, abstract = {Great attention has been paid to Big Data in recent years. Such data hold promise for scientific discoveries but also pose challenges to analyses. One potential challenge is noise accumulation. In this paper, we explore noise accumulation in high dimensional two-group classification. First, we revisit a previous assessment of noise accumulation with principal component analyses, which yields a different threshold for discriminative ability than originally identified. Then we extend our scope to its impact on classifiers developed with three common machine learning approaches{\textemdash}random forest, support vector machine, and boosted classification trees. We simulate four scenarios with differing amounts of signal strength to evaluate each method. After determining noise accumulation may affect the performance of these classifiers, we assess factors that impact it. We
conduct simulations by varying sample size, signal strength, signal strength proportional to the number predictors, and signal magnitude with random forest classifiers. These simulations suggest that noise accumulation affects the discriminative ability of high-dimensional classifiers developed using common machine learning methods, which can be modified by sample size, signal strength, and signal magnitude. We developed the measure total signal index (TSI) to track the trends of total signal and noise accumulation.}, keywords = {Business Analytics}, url = {http://jmlr.org/papers/volume21/19-117/19-117.pdf}, author = {Elman,Miriam and Minnier,Jessica and Chang,Xiaohui and Choi,Dongseok} } @article {1969941, title = {Realized Volatility Forecasting and Volatility Spillovers: Evidence from Chinese Non-Ferrous Metals Futures}, journal = {International Journal of Finance and Economics}, volume = {26}, year = {2020}, month = {2020}, pages = {2713{\textendash}2731}, abstract = {We study the prediction of realized volatility of non-ferrous metals futures traded on the Shanghai Futures Exchange from March 2011 to December 2017. A dynamic model averaging model is employed to combine multiple prediction models using time-varying weights based on individual model performance. Empirical results also reveal that models incorporating volatility spillovers across metals are important for forecast combinations, and short-term spillovers have a stronger impact than long-term spillovers. This approach offers the best forecasting performance and allows users to identify the most dominant model at any given time and demonstrate when and how volatility transmission from another metal is valuable for forecasting. We also find evidence of distinct trading behaviors in emerging and developed markets.}, keywords = {Business Analytics}, author = {Wang,Donghua and Xin,Yang and Chang,Xiaohui and Su,Xingze} } @article {1969956, title = {Business Performance Prediction in Location-based Social Commerce}, journal = {Expert Systems with Applications}, volume = {126}, year = {2019}, month = {2019}, pages = {112-123}, abstract = {Social commerce and location-based services provide a data platform for coexisting and competing businesses in geographical neighborhoods. Our research is aimed at mining data from such platforms to gain valuable insights for better support to strategic and operational business decisions. We develop a computational framework for predicting business performance that takes into account both intrinsic (e.g., attributes) and extrinsic (e.g., competitions) factors. Our experiments on synthetic and real datasets demonstrated superiority of a hybrid prediction model that adopts both link-based and context-based assumptions.}, keywords = {Business Analytics}, author = {Chang,Xiaohui and Li,Jiexun} } @booklet {1969951, title = {Location-based Data on Social Commerce Platforms can Provide Insights for Business Decisions}, year = {2019}, month = {2019}, address = {Corvallis, OR}, keywords = {Business Analytics}, author = {Chang,Xiaohui} } @article {1969966, title = {Flexible and Efficient Estimating Equations for Variogram Estimation}, journal = {Computational Statistics and Data Analysis}, volume = {122}, year = {2018}, month = {2018}, pages = {45-58}, abstract = {Variogram estimation plays a vastly important role in spatial modeling. Different methods
for variogram estimation can be largely classified into least squares methods and likelihood
based methods. A general framework to estimate the variogram through a set of estimating
equations is proposed. This approach serves as an alternative approach to likelihood based
methods and includes commonly used least squares approaches as its special cases. The
proposed method is highly efficient as a low dimensional representation of the weight
matrix is employed. The statistical efficiency of various estimators is explored and the lag
effect is examined. An application to a hydrology data set is also presented.}, keywords = {Business Analytics}, author = {Sun,Ying and Chang,Xiaohui and Guan,Yongtao} } @article {1969961, title = {Using a Q Matrix to Assess Students{\textquoteright} Latent Skills in an Online Course}, year = {2018}, month = {2018}, keywords = {Business Analytics, Supply Chain}, url = {https://ecampus.oregonstate.edu/research/publications/white-papers/}, author = {Hsieh,Ping-Hung and Chang,Xiaohui and Olstad,Andrew} } @article {1969971, title = {The Lead-Lag Relationship between the Spot and Futures Markets in China}, journal = {Quantitative Finance}, volume = {17}, year = {2017}, month = {2017}, pages = {1447{\textendash}1456}, abstract = {Based on daily and one-minute high-frequency returns, this paper examines the
lead-lag dependence between the CSI 300 index spot and futures markets from 2010 to 2014. The
nonparametric and nonlinear thermal optimal path method is adopted. Empirical results of the
daily data indicate that the lead-lag relationship between the two markets is within one day but
this relationship is volatile since neither of the two possible situations (the futures leads or lags
behind the spot market) takes a dominant place. Besides, our results from high-frequency data
demonstrate that there is a price discovery in the Chinese futures market: the intraday one-minute
futures return leads the cash return by 0~5 minutes regardless of the price trend of the market.}, keywords = {Business Analytics}, author = {Wang,Donghua and Tu,Jingqing and Chang,Xiaohui and Li,Saiping} } @article {1976896, title = {Early Detection of Placement for Success in an Online Quantitative Class}, year = {2016}, month = {2016}, address = {Chicago, IL}, keywords = {Business Analytics, Supply Chain}, author = {Hsieh,Ping-Hung and Chang,Xiaohui and Olstad,Andrew} } @article {1969981, title = {Disease Risk Estimation by Combining Case-Control Data with Aggregated Information on the Population at Risk}, journal = {Biometrics}, volume = {71}, year = {2015}, month = {2015}, pages = {114-121}, abstract = {We propose a novel statistical framework by supplementing case-control data with summary statistics on the population at risk for a subset of risk factors. Our approach is to first form two unbiased estimating equations, one based on the case-control data and the other on both the case data and the summary statistics, and then optimally combine them to derive another estimating equation to be used for the estimation. The proposed method is computationally simple and more efficient than standard approaches based on case-control data alone. We also establish asymptotic properties of the resulting estimator, and investigate its finite-sample performance through simulation. As a substantive application, we apply the proposed method to investigate risk factors for endometrial cancer, by using data from a recently completed population-based case-control study and summary statistics from the Behavioral Risk Factor Surveillance System, the Population Estimates Program of the US Census Bureau, and the Connecticut Department of Transportation.}, keywords = {Business Analytics}, author = {Chang,Xiaohui and Waagepetersen,R. and Yu,H. and Ma,X. and Holford,T. R. and Wang,R. and Guan,Y.} } @article {1969976, title = {Dynamic relation of Chinese stock price-volume pre- and post- the Split Share Structure Reform: New evidence from a two-state Markov-switching approach}, journal = {China Finance Review International}, volume = {5}, year = {2015}, month = {2015}, pages = {386-401}, abstract = {Purpose {\textendash} The purpose of this paper is to identify the bull and bear regimes in Chinese stock market and empirically analyze the dynamic relation of Chinese stock price-volume pre- and post- the Split Share Structure Reform.

Design/methodology/approach {\textendash} The authors investigate the price-volume relationship in the Chinese stock market before and after the Split Share Structure Reform using Shanghai Composite Index daily data from July 1994 to April 2013. Using a two-state Markov-switching autoregressive model and a modified two-state Markov-switching vector autoregression model, this study identifies bull or bear market and also examine the existence of regime-dependent Granger causality.

Findings {\textendash} Using a two-state Markov-switching autoregressive model, the authors detect structural changes in the market volatility due to the reform, and find evidence of a positive rather than an asymmetric price-volume contemporaneous correlation. There is a strong dynamic Granger causal relation from stock returns to trading volume before and after the reform regardless of the market conditions, but the causal effects of volume on returns are only seen in the bear markets before the reform. The model is robust when using different stock indices and time periods.

Originality/value {\textendash} The work is different from previous studies in the following aspects: most of the existing empirical literature focus on the well-developed economies, but our interest lies in the emerging Chinese market that has witnessed rapid growth in the past decade; in contrast to many works in the literature that examine the price-volume relationship during one market condition, the authors compare the relationship in a bull market with that in a bear market, using a two-state MS-AR model; the authors also employ a modified two-state Markov-switching vector autoregression model to examine the existence of regime-dependent Granger causality; as the most massive systematic reform for the Chinese stock market since its inception in 2005, the Split Share Structure Reform has a profound impact on the Chinese stock market, thus it is of vital importance to explore its effects on both the price-volume relationship and the market structure.}, keywords = {Business Analytics}, author = {Wang,Donghua and Lei,Man and Chang,Xiaohui} } @article {1969991, title = {Wavelet Methods in Interpolation of High-Frequency Spatial-Temporal Pressure}, journal = {Spatial Statistics}, volume = {8}, year = {2014}, month = {2014}, pages = {52{\textendash}68}, abstract = {The location-scale and whitening properties of wavelets make them more favorable for interpolating high-frequency monitoring data than Fourier-based methods. In the past, wavelets have been used to simplify the dependence structure in multiple time or spatial series, but little has been done to apply wavelets as a modeling tool in a space{\textendash}time setting, or, in particular, to take advantage of the localization of wavelets to capture the local dynamic characteristics of high-frequency meteorological data. This paper analyzes minute-by-minute atmospheric pressure data from the Atmospheric Radiation Measurement program using different wavelet coefficient structures at different scales and incorporating spatial structure into the model. This approach of modeling space{\textendash}time processes using wavelets produces accurate point predictions with low uncertainty estimates, and also enables interpolation of available data from sparse monitoring stations to a high density grid and production of meteorological maps on large spatial and temporal scales.}, keywords = {Business Analytics}, author = {Chang,Xiaohui and Stein,Michael L.} } @article {1969986, title = {Decorrelation Property of Discrete Wavelet Transform Under Fixed-Domain Asymptotics}, journal = {IEEE Transactions on Information Theory}, volume = {59}, year = {2013}, month = {2013}, pages = {8001-8013}, abstract = {Theoretical aspects of the decorrelation property of the discrete wavelet transform when applied to stochastic processes have been studied exclusively from the increasing-domain perspective, in which the distance between neighboring observations stays roughly constant as the number of observations increases. To understand the underlying data-generating process and to obtain good interpolations, fixed-domain asymptotics, in which the number of observations increases in a fixed region, is often more appropriate than increasing-domain asymptotics. In the fixed-domain setting, we prove that, for a general class of inhomogeneous covariance functions, with suitable choice of wavelet filters, the wavelet transform of a nonstationary process has mostly asymptotically uncorrelated components.}, keywords = {Business Analytics}, author = {Chang,Xiaohui and Stein,Michael L.} } @article {1984341, title = {Additive Dynamic Models for Correcting Numerical Model Outputs}, journal = {Computational Statistics and Data Analysis}, month = {2023 In Press}, abstract = {
Numerical air quality models are pivotal for the prediction and assessment of air pollution, but numerical model outputs may be systematically biased. An additive dynamic model is proposed to correct large-scale raw model outputs using data from other sources, including readings collected at ground monitoring networks and weather outputs from other numerical models. An additive partially linear model specification is employed for the nonlinear relationships between air pollutants and covariates. In addition, a multi-resolution basis function approximate is proposed to capture the different small-scale variations of biases, and a discretized stochastic
integro-differential equation is constructed to characterize the dynamic evolution of the random coefficients at each spatial resolution. An expectation-maximization algorithm is developed for parameter estimation and a multi-resolution ensemble-based scheme is embedded to accelerate the computation. For statistical inference, a conditional simulation technique is applied to quantify the uncertainty of parameter estimates and bias correction results. The proposed approach is used to correct the biased raw outputs of PM2.5 from the Community Multiscale Air
Quality (CMAQ) system for China{\textquoteright}s Beijing-Tianjin-Hebei region. Our method improves the root mean squared error and continuous rank probability score by 43.70\% and 34.76\%, respectively. Compared to other statistical methods under different metrics, our model has advantages in both correction accuracy and computational efficiency.}, keywords = {Business Analytics}, author = {Chang,Xiaohui and Chen,Yewen and Huang,Hui and Luo,Fangzhi} }